const GenRegister sampler = ra->genReg(insn.src(5));
const GenRegister ucoord = ra->genReg(insn.src(6));
const GenRegister vcoord = ra->genReg(insn.src(7));
+ const GenRegister wcoord = ra->genReg(insn.src(8));
const GenRegister temp = GenRegister::ud1grf(msgPayload.nr, msgPayload.subnr/sizeof(float) + 4);
const GenRegister a0_0 = GenRegister::ud1arf(GEN_ARF_ADDRESS, 0);
uint32_t simdWidth = p->curr.execWidth;
/* Prepare message payload. */
p->MOV(GenRegister::f8grf(nr , 0), ucoord);
p->MOV(GenRegister::f8grf(nr + (simdWidth/8), 0), vcoord);
+ if (insn.src(8).reg() != 0)
+ p->MOV(GenRegister::f8grf(nr + (simdWidth/4), 0), wcoord);
p->SAMPLE(dst, msgPayload, a0_0, -1, 0);
p->pop();
const GenRegister bti = ra->genReg(insn.src(0 + insn.extra.elem));
const GenRegister ucoord = ra->genReg(insn.src(1 + insn.extra.elem));
const GenRegister vcoord = ra->genReg(insn.src(2 + insn.extra.elem));
- const GenRegister R = ra->genReg(insn.src(3 + insn.extra.elem));
- const GenRegister G = ra->genReg(insn.src(4 + insn.extra.elem));
- const GenRegister B = ra->genReg(insn.src(5 + insn.extra.elem));
- const GenRegister A = ra->genReg(insn.src(6 + insn.extra.elem));
+ const GenRegister wcoord = ra->genReg(insn.src(3 + insn.extra.elem));
+ const GenRegister R = ra->genReg(insn.src(4 + insn.extra.elem));
+ const GenRegister G = ra->genReg(insn.src(5 + insn.extra.elem));
+ const GenRegister B = ra->genReg(insn.src(6 + insn.extra.elem));
+ const GenRegister A = ra->genReg(insn.src(7 + insn.extra.elem));
const GenRegister a0_0 = GenRegister::ud1arf(GEN_ARF_ADDRESS, 0);
p->push();
GenRegister::retype(GenRegister::QnPhysical(src,quarter), src.type))
QUARTER_MOV0(nr + 1, ucoord);
QUARTER_MOV0(nr + 2, vcoord);
+ if (insn.src(3 + insn.extra.elem).reg() != 0)
+ QUARTER_MOV0(nr + 3, wcoord);
QUARTER_MOV1(nr + 5, R);
QUARTER_MOV1(nr + 6, G);
QUARTER_MOV1(nr + 7, B);
<< " sampler %" << this->getSrc(fn, 1)
<< " coord u %" << this->getSrc(fn, 2)
<< " coord v %" << this->getSrc(fn, 3)
+ << " coord w %" << this->getSrc(fn, 4)
<< " %" << this->getDst(fn, 0)
<< " %" << this->getDst(fn, 1)
<< " %" << this->getDst(fn, 2)
INLINE Type getSrcType(void) const { return this->srcType; }
INLINE Type getDstType(void) const { return this->dstType; }
- static const uint32_t srcNum = 4;
+ static const uint32_t srcNum = 5;
static const uint32_t dstNum = 4;
};
<< " surface id %" << this->getSrc(fn, 0)
<< " coord u %" << this->getSrc(fn, 1)
<< " coord v %" << this->getSrc(fn, 2)
- << " %" << this->getSrc(fn, 3)
+ << " coord w %" << this->getSrc(fn, 3)
<< " %" << this->getSrc(fn, 4)
<< " %" << this->getSrc(fn, 5)
- << " %" << this->getSrc(fn, 6);
+ << " %" << this->getSrc(fn, 6)
+ << " %" << this->getSrc(fn, 7);
}
Tuple src;
INLINE Type getSrcType(void) const { return this->srcType; }
INLINE Type getCoordType(void) const { return this->coordType; }
- // bti, u, v, 4 data elements
- static const uint32_t srcNum = 7;
+ // bti, u, v, w, 4 data elements
+ static const uint32_t srcNum = 8;
Register dst[0]; //!< No dest register
};
case GEN_OCL_WRITE_IMAGE3:
case GEN_OCL_WRITE_IMAGE4:
case GEN_OCL_WRITE_IMAGE5:
+ case GEN_OCL_WRITE_IMAGE10:
+ case GEN_OCL_WRITE_IMAGE11:
+ case GEN_OCL_WRITE_IMAGE12:
+ case GEN_OCL_WRITE_IMAGE13:
+ case GEN_OCL_WRITE_IMAGE14:
+ case GEN_OCL_WRITE_IMAGE15:
break;
case GEN_OCL_READ_IMAGE0:
case GEN_OCL_READ_IMAGE1:
case GEN_OCL_READ_IMAGE3:
case GEN_OCL_READ_IMAGE4:
case GEN_OCL_READ_IMAGE5:
+ case GEN_OCL_READ_IMAGE10:
+ case GEN_OCL_READ_IMAGE11:
+ case GEN_OCL_READ_IMAGE12:
+ case GEN_OCL_READ_IMAGE13:
+ case GEN_OCL_READ_IMAGE14:
+ case GEN_OCL_READ_IMAGE15:
{
// dst is a 4 elements vector. We allocate all 4 registers here.
uint32_t elemNum;
case GEN_OCL_READ_IMAGE3:
case GEN_OCL_READ_IMAGE4:
case GEN_OCL_READ_IMAGE5:
+ case GEN_OCL_READ_IMAGE10:
+ case GEN_OCL_READ_IMAGE11:
+ case GEN_OCL_READ_IMAGE12:
+ case GEN_OCL_READ_IMAGE13:
+ case GEN_OCL_READ_IMAGE14:
+ case GEN_OCL_READ_IMAGE15:
{
GBE_ASSERT(AI != AE); const ir::Register surface_id = this->getRegister(*AI); ++AI;
GBE_ASSERT(AI != AE); const ir::Register sampler = this->getRegister(*AI); ++AI;
GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI;
GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI;
+ ir::Register wcoord;
+ if (it->second == GEN_OCL_READ_IMAGE10 ||
+ it->second == GEN_OCL_READ_IMAGE11 ||
+ it->second == GEN_OCL_READ_IMAGE12 ||
+ it->second == GEN_OCL_READ_IMAGE13 ||
+ it->second == GEN_OCL_READ_IMAGE14) {
+ GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI;
+ } else
+ wcoord = ir::Register(0);
vector<ir::Register> dstTupleData, srcTupleData;
const uint32_t elemNum = 4;
srcTupleData.push_back(sampler);
srcTupleData.push_back(ucoord);
srcTupleData.push_back(vcoord);
+ srcTupleData.push_back(wcoord);
const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], elemNum);
- const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 4);
+ const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 5);
ir::Type srcType = ir::TYPE_U32, dstType = ir::TYPE_U32;
switch(it->second) {
case GEN_OCL_READ_IMAGE0:
case GEN_OCL_READ_IMAGE2:
+ case GEN_OCL_READ_IMAGE10:
+ case GEN_OCL_READ_IMAGE12:
srcType = dstType = ir::TYPE_U32;
break;
case GEN_OCL_READ_IMAGE1:
case GEN_OCL_READ_IMAGE3:
+ case GEN_OCL_READ_IMAGE11:
+ case GEN_OCL_READ_IMAGE13:
dstType = ir::TYPE_U32;
srcType = ir::TYPE_FLOAT;
break;
case GEN_OCL_READ_IMAGE4:
+ case GEN_OCL_READ_IMAGE14:
dstType = ir::TYPE_FLOAT;
srcType = ir::TYPE_U32;
break;
case GEN_OCL_READ_IMAGE5:
+ case GEN_OCL_READ_IMAGE15:
srcType = dstType = ir::TYPE_FLOAT;
break;
default:
case GEN_OCL_WRITE_IMAGE3:
case GEN_OCL_WRITE_IMAGE4:
case GEN_OCL_WRITE_IMAGE5:
+ case GEN_OCL_WRITE_IMAGE10:
+ case GEN_OCL_WRITE_IMAGE11:
+ case GEN_OCL_WRITE_IMAGE12:
+ case GEN_OCL_WRITE_IMAGE13:
+ case GEN_OCL_WRITE_IMAGE14:
+ case GEN_OCL_WRITE_IMAGE15:
{
GBE_ASSERT(AI != AE); const ir::Register surface_id = this->getRegister(*AI); ++AI;
GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI;
GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI;
+ ir::Register wcoord;
+ if(it->second == GEN_OCL_WRITE_IMAGE10 ||
+ it->second == GEN_OCL_WRITE_IMAGE11 ||
+ it->second == GEN_OCL_WRITE_IMAGE12 ||
+ it->second == GEN_OCL_WRITE_IMAGE13 ||
+ it->second == GEN_OCL_WRITE_IMAGE14) {
+ GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI;
+ } else
+ wcoord = ir::Register(0);
GBE_ASSERT(AI != AE);
vector<ir::Register> srcTupleData;
srcTupleData.push_back(surface_id);
srcTupleData.push_back(ucoord);
srcTupleData.push_back(vcoord);
+ srcTupleData.push_back(wcoord);
const uint32_t elemNum = 4;
for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
const ir::Register reg = this->getRegister(*AI, elemID);
srcTupleData.push_back(reg);
}
- const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 7);
+ const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 8);
ir::Type srcType = ir::TYPE_U32, coordType = ir::TYPE_U32;
switch(it->second) {
case GEN_OCL_WRITE_IMAGE0:
case GEN_OCL_WRITE_IMAGE2:
+ case GEN_OCL_WRITE_IMAGE10:
+ case GEN_OCL_WRITE_IMAGE12:
srcType = coordType = ir::TYPE_U32;
break;
case GEN_OCL_WRITE_IMAGE1:
case GEN_OCL_WRITE_IMAGE3:
+ case GEN_OCL_WRITE_IMAGE11:
+ case GEN_OCL_WRITE_IMAGE13:
coordType = ir::TYPE_FLOAT;
srcType = ir::TYPE_U32;
break;
case GEN_OCL_WRITE_IMAGE4:
+ case GEN_OCL_WRITE_IMAGE14:
srcType = ir::TYPE_FLOAT;
coordType = ir::TYPE_U32;
break;
case GEN_OCL_WRITE_IMAGE5:
+ case GEN_OCL_WRITE_IMAGE15:
srcType = coordType = ir::TYPE_FLOAT;
break;
default: