From: Zhigang Gong Date: Wed, 25 Sep 2013 09:00:16 +0000 (+0800) Subject: GBE: Refine the curbe entry allocation for sampler/image information. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=187c17ed9f87a7cf235c7ca08823f956d9b06646;p=contrib%2Fbeignet.git GBE: Refine the curbe entry allocation for sampler/image information. After the previous patch, we can move the image infomation curbe entry allocation to prior to the instruction selection. Then we can concentrate all curbe allocation before we do the normal register allocation. This way can bring two advantages: 1. Avoid the image information curbe entry is allocated among the normal registers. 2. The register interval analyzing could handle the image/sampler information correctly. Signed-off-by: Zhigang Gong Reviewed-by: "Yang, Rong R" --- diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp index 2cacd07..cbd38f1 100644 --- a/backend/src/backend/context.cpp +++ b/backend/src/backend/context.cpp @@ -316,8 +316,6 @@ namespace gbe this->kernel = NULL; } if(this->kernel != NULL) { - // Align it on 32 bytes properly - this->kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE); this->kernel->scratchSize = alignScratchSize(this->scratchOffset); this->kernel->ctx = this; } @@ -390,7 +388,7 @@ namespace gbe offset = kernel->getCurbeOffset(GBE_CURBE_IMAGE_INFO, key.data); GBE_ASSERT(offset >= 0); // XXX do we need to spill it out to bo? fn.getImageSet()->appendInfo(key, offset); - return offset; + return offset + GEN_REG_SIZE; } @@ -425,7 +423,7 @@ namespace gbe insertCurbeReg(ir::ocl::lid0, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_X, 0, localIDSize)); insertCurbeReg(ir::ocl::lid1, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Y, 0, localIDSize)); insertCurbeReg(ir::ocl::lid2, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Z, 0, localIDSize)); - insertCurbeReg(ir::ocl::samplerinfo, this->newCurbeEntry(GBE_CURBE_SAMPLER_INFO, 0, 32)); + insertCurbeReg(ir::ocl::samplerinfo, this->newCurbeEntry(GBE_CURBE_SAMPLER_INFO, 0, 32)); // Go over all the instructions and find the special register we need // to push @@ -436,10 +434,34 @@ namespace gbe } else bool useStackPtr = false; - fn.foreachInstruction([&](const ir::Instruction &insn) { + fn.foreachInstruction([&](ir::Instruction &insn) { const uint32_t srcNum = insn.getSrcNum(); for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { const ir::Register reg = insn.getSrc(srcID); + if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { + if (srcID != 0) continue; + const unsigned char bti = fn.getImageSet()->getIdx(insn.getSrc(srcID)); + const unsigned char type = ir::cast(insn).getInfoType();; + ir::ImageInfoKey key; + key.index = bti; + key.type = type; + const ir::Register imageInfo(key.data | 0x8000); + ir::Register realImageInfo; + if (curbeRegs.find(imageInfo) == curbeRegs.end()) { + uint32_t offset = this->getImageInfoCurbeOffset(key, 4); + realImageInfo = insn.getSrc(1); + insertCurbeReg(realImageInfo, offset); + insertCurbeReg(imageInfo, (uint32_t)realImageInfo); + } else + realImageInfo = ir::Register(curbeRegs.find(imageInfo)->second); + insn.setSrc(srcID, realImageInfo); + continue; + } else if (insn.getOpcode() == ir::OP_GET_SAMPLER_INFO) { + /* change the src to sampler information register. */ + if (curbeRegs.find(ir::ocl::samplerinfo) == curbeRegs.end()) + insertCurbeReg(ir::ocl::samplerinfo, this->newCurbeEntry(GBE_CURBE_SAMPLER_INFO, 0, 32)); + continue; + } if (fn.isSpecialReg(reg) == false) continue; if (curbeRegs.find(reg) != curbeRegs.end()) continue; if (reg == ir::ocl::stackptr) useStackPtr = true; @@ -457,7 +479,7 @@ namespace gbe INSERT_REG(numgroup1, GROUP_NUM_Y, 1) INSERT_REG(numgroup2, GROUP_NUM_Z, 1) INSERT_REG(stackptr, STACK_POINTER, this->simdWidth) - do {} while (0); + do {} while(0); } }); #undef INSERT_REG diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index d9ea7ff..bd52885 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -2820,14 +2820,11 @@ namespace gbe INLINE bool emitOne(Selection::Opaque &sel, const ir::GetImageInfoInstruction &insn) const { using namespace ir; - const uint32_t infoType = insn.getInfoType(); - GenRegister dst[4]; - uint32_t dstNum = ir::GetImageInfoInstruction::getDstNum4Type(infoType); - for (uint32_t valueID = 0; valueID < dstNum; ++valueID) - dst[valueID] = sel.selReg(insn.getDst(valueID), TYPE_U32); - uint32_t bti = sel.ctx.getFunction().getImageSet()->getIdx - (insn.getSrc(0)); - sel.GET_IMAGE_INFO(infoType, dst, dstNum, bti); + GenRegister dst; + dst = sel.selReg(insn.getDst(0), TYPE_U32); + GenRegister imageInfoReg = GenRegister::ud1grf(insn.getSrc(0)); + sel.MOV(dst, imageInfoReg); + return true; } DECL_CTOR(GetImageInfoInstruction, 1, 1); diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp index a72333d..ab8b7ee 100644 --- a/backend/src/backend/gen_reg_allocation.cpp +++ b/backend/src/backend/gen_reg_allocation.cpp @@ -142,7 +142,8 @@ namespace gbe INLINE void GenRegAllocator::Opaque::allocatePayloadRegs(void) { using namespace ir; for(auto &it : this->ctx.curbeRegs) - allocatePayloadReg(it.first, it.second); + if (it.first.value() < 0x8000) + allocatePayloadReg(it.first, it.second); // Allocate all pushed registers (i.e. structure kernel arguments) const Function &fn = ctx.getFunction(); diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 0278bc6..7c6c6c6 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -556,18 +556,20 @@ namespace ir { class ALIGNED_INSTRUCTION GetImageInfoInstruction : public BasePolicy, - public NSrcPolicy, - public TupleDstPolicy + public NSrcPolicy, + public NDstPolicy { public: GetImageInfoInstruction( int type, - Tuple dst, - Register src) + Register dst, + Register src, + Register infoReg) { this->opcode = OP_GET_IMAGE_INFO; this->infoType = type; - this->dst = dst; + this->dst[0] = dst; this->src[0] = src; + this->src[1] = infoReg; } INLINE uint32_t getInfoType(void) const { return infoType; } @@ -580,11 +582,9 @@ namespace ir { } uint8_t infoType; //!< Type of the requested information. - Register src[1]; //!< Surface to get info - Tuple dst; //!< dest register to put the information. - static const uint32_t dstNum = 4; //! The maximum dst number. Not the actual number - // of destination tuple. We use the infoType to determin - // the actual num. + Register src[2]; //!< Surface to get info + Register dst[1]; //!< dest register to put the information. + static const uint32_t dstNum = 1; }; class ALIGNED_INSTRUCTION LoadImmInstruction : @@ -1528,8 +1528,8 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType()) return internal::TypedWriteInstruction(src, srcType, coordType).convert(); } - Instruction GET_IMAGE_INFO(int infoType, Tuple dst, Register src) { - return internal::GetImageInfoInstruction(infoType, dst, src).convert(); + Instruction GET_IMAGE_INFO(int infoType, Register dst, Register src, Register infoReg) { + return internal::GetImageInfoInstruction(infoType, dst, src, infoReg).convert(); } Instruction GET_SAMPLER_INFO(Register dst, Register src) { diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 3697c17..27a34d1 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -360,8 +360,8 @@ namespace ir { typedef union { struct { - uint16_t index; /*! the allocated image index */ - uint16_t type; /*! the information type */ + uint8_t index; /*! the allocated image index */ + uint8_t type; /*! the information type */ }; uint32_t data; } ImageInfoKey; @@ -644,7 +644,7 @@ namespace ir { /*! sample textures */ Instruction SAMPLE(Tuple dst, Tuple src, Type dstType, Type srcType); /*! get image information , such as width/height/depth/... */ - Instruction GET_IMAGE_INFO(int infoType, Tuple dst, Register src); + Instruction GET_IMAGE_INFO(int infoType, Register dst, Register src, Register infoReg); /*! get sampler information */ Instruction GET_SAMPLER_INFO(Register dst, Register src); /*! label labelIndex */ diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 27263f8..8b73ac9 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2097,21 +2097,10 @@ namespace gbe GBE_ASSERT(AI != AE); const ir::Register surface_id = this->getRegister(*AI); ++AI; uint32_t elemNum; (void)getVectorInfo(ctx, I.getType(), &I, elemNum); - vector dstTupleData; - ir::Register lastReg; - for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { - const ir::Register reg = this->getRegister(&I, elemID); - dstTupleData.push_back(reg); - lastReg = reg; - } - // A walk around for the gen IR limitation. - for (uint32_t elemID = elemNum; elemID < 4; ++ elemID) { - dstTupleData.push_back(lastReg); - } - const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], 4); + const ir::Register reg = this->getRegister(&I, 0); int infoType = it->second - GEN_OCL_GET_IMAGE_WIDTH; - ctx.GET_IMAGE_INFO(infoType, dstTuple, surface_id); + ctx.GET_IMAGE_INFO(infoType, reg, surface_id, ctx.reg(ir::FAMILY_DWORD)); break; } case GEN_OCL_GET_SAMPLER_INFO: diff --git a/kernels/compiler_box_blur_image.cl b/kernels/compiler_box_blur_image.cl index 7bcbdeb..42f463b 100644 --- a/kernels/compiler_box_blur_image.cl +++ b/kernels/compiler_box_blur_image.cl @@ -10,7 +10,7 @@ __kernel void compiler_box_blur_image(__read_only image2d_t src, for (offset.y = -1; offset.y <= 1; offset.y++) { for (offset.x = -1; offset.x <= 1; offset.x++) { - sum += read_imagef(src, sampler, coord + offset); + sum += read_imagef(src, sampler, coord + offset); } } diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index f2c051b..b85c0cd 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -182,9 +182,11 @@ cl_curbe_fill(cl_kernel ker, /* Upload sampler information. */ offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_SAMPLER_INFO, 0); - uint32_t i; - for(i = 0; i < ker->sampler_sz; i++, offset += 2) { - *((uint16_t *) (ker->curbe + offset)) = ker->samplers[i] & 0xFF; + if (offset >= 0) { + uint32_t i; + for(i = 0; i < ker->sampler_sz; i++, offset += 2) { + *((uint16_t *) (ker->curbe + offset)) = ker->samplers[i] & 0xFF; + } } /* Write identity for the stack pointer. This is required by the stack pointer