this->kernel = NULL;
}
if(this->kernel != NULL) {
- // Align it on 32 bytes properly
- this->kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE);
this->kernel->scratchSize = alignScratchSize(this->scratchOffset);
this->kernel->ctx = this;
}
offset = kernel->getCurbeOffset(GBE_CURBE_IMAGE_INFO, key.data);
GBE_ASSERT(offset >= 0); // XXX do we need to spill it out to bo?
fn.getImageSet()->appendInfo(key, offset);
- return offset;
+ return offset + GEN_REG_SIZE;
}
insertCurbeReg(ir::ocl::lid0, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_X, 0, localIDSize));
insertCurbeReg(ir::ocl::lid1, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Y, 0, localIDSize));
insertCurbeReg(ir::ocl::lid2, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Z, 0, localIDSize));
- insertCurbeReg(ir::ocl::samplerinfo, this->newCurbeEntry(GBE_CURBE_SAMPLER_INFO, 0, 32));
+ insertCurbeReg(ir::ocl::samplerinfo, this->newCurbeEntry(GBE_CURBE_SAMPLER_INFO, 0, 32));
// Go over all the instructions and find the special register we need
// to push
} else
bool useStackPtr = false;
- fn.foreachInstruction([&](const ir::Instruction &insn) {
+ fn.foreachInstruction([&](ir::Instruction &insn) {
const uint32_t srcNum = insn.getSrcNum();
for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
const ir::Register reg = insn.getSrc(srcID);
+ if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) {
+ if (srcID != 0) continue;
+ const unsigned char bti = fn.getImageSet()->getIdx(insn.getSrc(srcID));
+ const unsigned char type = ir::cast<ir::GetImageInfoInstruction>(insn).getInfoType();;
+ ir::ImageInfoKey key;
+ key.index = bti;
+ key.type = type;
+ const ir::Register imageInfo(key.data | 0x8000);
+ ir::Register realImageInfo;
+ if (curbeRegs.find(imageInfo) == curbeRegs.end()) {
+ uint32_t offset = this->getImageInfoCurbeOffset(key, 4);
+ realImageInfo = insn.getSrc(1);
+ insertCurbeReg(realImageInfo, offset);
+ insertCurbeReg(imageInfo, (uint32_t)realImageInfo);
+ } else
+ realImageInfo = ir::Register(curbeRegs.find(imageInfo)->second);
+ insn.setSrc(srcID, realImageInfo);
+ continue;
+ } else if (insn.getOpcode() == ir::OP_GET_SAMPLER_INFO) {
+ /* change the src to sampler information register. */
+ if (curbeRegs.find(ir::ocl::samplerinfo) == curbeRegs.end())
+ insertCurbeReg(ir::ocl::samplerinfo, this->newCurbeEntry(GBE_CURBE_SAMPLER_INFO, 0, 32));
+ continue;
+ }
if (fn.isSpecialReg(reg) == false) continue;
if (curbeRegs.find(reg) != curbeRegs.end()) continue;
if (reg == ir::ocl::stackptr) useStackPtr = true;
INSERT_REG(numgroup1, GROUP_NUM_Y, 1)
INSERT_REG(numgroup2, GROUP_NUM_Z, 1)
INSERT_REG(stackptr, STACK_POINTER, this->simdWidth)
- do {} while (0);
+ do {} while(0);
}
});
#undef INSERT_REG
INLINE bool emitOne(Selection::Opaque &sel, const ir::GetImageInfoInstruction &insn) const
{
using namespace ir;
- const uint32_t infoType = insn.getInfoType();
- GenRegister dst[4];
- uint32_t dstNum = ir::GetImageInfoInstruction::getDstNum4Type(infoType);
- for (uint32_t valueID = 0; valueID < dstNum; ++valueID)
- dst[valueID] = sel.selReg(insn.getDst(valueID), TYPE_U32);
- uint32_t bti = sel.ctx.getFunction().getImageSet()->getIdx
- (insn.getSrc(0));
- sel.GET_IMAGE_INFO(infoType, dst, dstNum, bti);
+ GenRegister dst;
+ dst = sel.selReg(insn.getDst(0), TYPE_U32);
+ GenRegister imageInfoReg = GenRegister::ud1grf(insn.getSrc(0));
+ sel.MOV(dst, imageInfoReg);
+
return true;
}
DECL_CTOR(GetImageInfoInstruction, 1, 1);
INLINE void GenRegAllocator::Opaque::allocatePayloadRegs(void) {
using namespace ir;
for(auto &it : this->ctx.curbeRegs)
- allocatePayloadReg(it.first, it.second);
+ if (it.first.value() < 0x8000)
+ allocatePayloadReg(it.first, it.second);
// Allocate all pushed registers (i.e. structure kernel arguments)
const Function &fn = ctx.getFunction();
class ALIGNED_INSTRUCTION GetImageInfoInstruction :
public BasePolicy,
- public NSrcPolicy<GetImageInfoInstruction, 1>,
- public TupleDstPolicy<GetImageInfoInstruction>
+ public NSrcPolicy<GetImageInfoInstruction, 2>,
+ public NDstPolicy<GetImageInfoInstruction, 1>
{
public:
GetImageInfoInstruction( int type,
- Tuple dst,
- Register src)
+ Register dst,
+ Register src,
+ Register infoReg)
{
this->opcode = OP_GET_IMAGE_INFO;
this->infoType = type;
- this->dst = dst;
+ this->dst[0] = dst;
this->src[0] = src;
+ this->src[1] = infoReg;
}
INLINE uint32_t getInfoType(void) const { return infoType; }
}
uint8_t infoType; //!< Type of the requested information.
- Register src[1]; //!< Surface to get info
- Tuple dst; //!< dest register to put the information.
- static const uint32_t dstNum = 4; //! The maximum dst number. Not the actual number
- // of destination tuple. We use the infoType to determin
- // the actual num.
+ Register src[2]; //!< Surface to get info
+ Register dst[1]; //!< dest register to put the information.
+ static const uint32_t dstNum = 1;
};
class ALIGNED_INSTRUCTION LoadImmInstruction :
return internal::TypedWriteInstruction(src, srcType, coordType).convert();
}
- Instruction GET_IMAGE_INFO(int infoType, Tuple dst, Register src) {
- return internal::GetImageInfoInstruction(infoType, dst, src).convert();
+ Instruction GET_IMAGE_INFO(int infoType, Register dst, Register src, Register infoReg) {
+ return internal::GetImageInfoInstruction(infoType, dst, src, infoReg).convert();
}
Instruction GET_SAMPLER_INFO(Register dst, Register src) {
typedef union {
struct {
- uint16_t index; /*! the allocated image index */
- uint16_t type; /*! the information type */
+ uint8_t index; /*! the allocated image index */
+ uint8_t type; /*! the information type */
};
uint32_t data;
} ImageInfoKey;
/*! sample textures */
Instruction SAMPLE(Tuple dst, Tuple src, Type dstType, Type srcType);
/*! get image information , such as width/height/depth/... */
- Instruction GET_IMAGE_INFO(int infoType, Tuple dst, Register src);
+ Instruction GET_IMAGE_INFO(int infoType, Register dst, Register src, Register infoReg);
/*! get sampler information */
Instruction GET_SAMPLER_INFO(Register dst, Register src);
/*! label labelIndex */
GBE_ASSERT(AI != AE); const ir::Register surface_id = this->getRegister(*AI); ++AI;
uint32_t elemNum;
(void)getVectorInfo(ctx, I.getType(), &I, elemNum);
- vector<ir::Register> dstTupleData;
- ir::Register lastReg;
- for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
- const ir::Register reg = this->getRegister(&I, elemID);
- dstTupleData.push_back(reg);
- lastReg = reg;
- }
- // A walk around for the gen IR limitation.
- for (uint32_t elemID = elemNum; elemID < 4; ++ elemID) {
- dstTupleData.push_back(lastReg);
- }
- const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], 4);
+ const ir::Register reg = this->getRegister(&I, 0);
int infoType = it->second - GEN_OCL_GET_IMAGE_WIDTH;
- ctx.GET_IMAGE_INFO(infoType, dstTuple, surface_id);
+ ctx.GET_IMAGE_INFO(infoType, reg, surface_id, ctx.reg(ir::FAMILY_DWORD));
break;
}
case GEN_OCL_GET_SAMPLER_INFO:
for (offset.y = -1; offset.y <= 1; offset.y++) {
for (offset.x = -1; offset.x <= 1; offset.x++) {
- sum += read_imagef(src, sampler, coord + offset);
+ sum += read_imagef(src, sampler, coord + offset);
}
}
/* Upload sampler information. */
offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_SAMPLER_INFO, 0);
- uint32_t i;
- for(i = 0; i < ker->sampler_sz; i++, offset += 2) {
- *((uint16_t *) (ker->curbe + offset)) = ker->samplers[i] & 0xFF;
+ if (offset >= 0) {
+ uint32_t i;
+ for(i = 0; i < ker->sampler_sz; i++, offset += 2) {
+ *((uint16_t *) (ker->curbe + offset)) = ker->samplers[i] & 0xFF;
+ }
}
/* Write identity for the stack pointer. This is required by the stack pointer