namespace gbe
{
-
IVAR(OCL_SIMD_WIDTH, 8, 16, 32);
Context::Context(const ir::Unit &unit, const std::string &name) :
}
}
+ // Already inserted registers go here
+ set<ir::Register> specialRegs;
+
+ // We insert the block IP mask first
+ kernel->patches.push_back(PatchInfo(GBE_CURBE_BLOCK_IP, 0, kernel->curbeSize));
+ kernel->curbeSize += this->simdWidth * sizeof(uint16_t);
+
+ // Then the local IDs (not scalar, so we align them properly)
+ kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE);
+ if (this->simdWidth == 16 || this->simdWidth == 32)
+ if ((kernel->curbeSize + GEN_REG_SIZE) % (2*GEN_REG_SIZE) != 0)
+ kernel->curbeSize += GEN_REG_SIZE;
+ const size_t localIDSize = sizeof(uint32_t) * this->simdWidth;
+ const PatchInfo lid0(GBE_CURBE_LOCAL_ID_X, 0, kernel->curbeSize);
+ kernel->curbeSize += localIDSize;
+ const PatchInfo lid1(GBE_CURBE_LOCAL_ID_Y, 0, kernel->curbeSize);
+ kernel->curbeSize += localIDSize;
+ const PatchInfo lid2(GBE_CURBE_LOCAL_ID_Z, 0, kernel->curbeSize);
+ kernel->curbeSize += localIDSize;
+ kernel->patches.push_back(lid0);
+ kernel->patches.push_back(lid1);
+ kernel->patches.push_back(lid2);
+ specialRegs.insert(ir::ocl::lid0);
+ specialRegs.insert(ir::ocl::lid1);
+ specialRegs.insert(ir::ocl::lid2);
+
// Go over all the instructions and find the special register value we need
// to push
-#define INSERT_REG(SPECIAL_REG, PATCH) \
- if (reg == ir::ocl::SPECIAL_REG) { \
- if (specialRegs.find(reg) != specialRegs.end()) continue; \
+#define INSERT_REG(SPECIAL_REG, PATCH) \
+ if (reg == ir::ocl::SPECIAL_REG) { \
+ if (specialRegs.find(reg) != specialRegs.end()) continue; \
const PatchInfo patch(GBE_CURBE_##PATCH, 0, kernel->curbeSize); \
- kernel->patches.push_back(patch); \
- kernel->curbeSize += ptrSize; \
+ kernel->patches.push_back(patch); \
+ kernel->curbeSize += ptrSize; \
} else
- set<ir::Register> specialRegs; // already inserted registers
fn.foreachInstruction([&](const ir::Instruction &insn) {
- // Special registers are immutable. So only check sources
const uint32_t srcNum = insn.getSrcNum();
for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
const ir::Register reg = insn.getSrc(srcID);
specialRegs.insert(reg);
}
});
-
- kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE);
- if (this->simdWidth == 16)
- if ((kernel->curbeSize + GEN_REG_SIZE) % (2*GEN_REG_SIZE) != 0)
- kernel->curbeSize += GEN_REG_SIZE;
-
- // Local IDs always go at the end of the curbe
- const size_t localIDSize = sizeof(uint32_t) * this->simdWidth;
- const PatchInfo lid0(GBE_CURBE_LOCAL_ID_X, 0, kernel->curbeSize+0*localIDSize);
- const PatchInfo lid1(GBE_CURBE_LOCAL_ID_Y, 0, kernel->curbeSize+1*localIDSize);
- const PatchInfo lid2(GBE_CURBE_LOCAL_ID_Z, 0, kernel->curbeSize+2*localIDSize);
- kernel->patches.push_back(lid0);
- kernel->patches.push_back(lid1);
- kernel->patches.push_back(lid2);
+#undef INSERT_REG
// After this point the vector is immutable. Sorting it will make
// research faster
std::sort(kernel->patches.begin(), kernel->patches.end());
+
+ // Align it on 128 bytes properly
+ kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE);
}
void Context::buildArgList(void) {
bool Context::isScalarReg(const ir::Register ®) const {
GBE_ASSERT(fn.getProfile() == ir::Profile::PROFILE_OCL);
- if (fn.getInput(reg) != NULL)
- return true;
+ if (fn.getInput(reg) != NULL) return true;
if (reg == ir::ocl::groupid0 ||
reg == ir::ocl::groupid1 ||
reg == ir::ocl::groupid2 ||
void GenContext::emitUnaryInstruction(const ir::UnaryInstruction &insn) {
GBE_ASSERT(insn.getOpcode() == ir::OP_MOV);
p->MOV(reg(insn.getDst(0)), reg(insn.getSrc(0)));
- //p->MOV(GenReg::retype(reg(insn.getDst(0)), GEN_TYPE_UD), GenReg::retype(reg(insn.getSrc(0)), GEN_TYPE_UD));
}
void GenContext::emitBinaryInstruction(const ir::BinaryInstruction &insn) {
const GenReg value = reg(insn.getValue(0));
// XXX remove that later. Now we just copy everything to GRFs to make it
// contiguous
-#if 1
if (this->simdWidth == 8) {
p->MOV(GenReg::vec8grf(112, 0), GenReg::retype(address, GEN_TYPE_F));
p->MOV(GenReg::vec8grf(113, 0), GenReg::retype(value, GEN_TYPE_F));
p->UNTYPED_WRITE(GenReg::vec16grf(112, 0), 0, 1);
} else
NOT_IMPLEMENTED;
-#else
- if (this->simdWidth == 8) {
- p->MOV(GenReg::ud8grf(112, 0), GenReg::retype(address, GEN_TYPE_UD));
- p->MOV(GenReg::ud8grf(113, 0), GenReg::retype(value, GEN_TYPE_UD));
- p->UNTYPED_WRITE(GenReg::vec8grf(112, 0), 0, 1);
- } else if (this->simdWidth == 16) {
- p->MOV(GenReg::ud16grf(112, 0), GenReg::retype(address, GEN_TYPE_UD));
- p->MOV(GenReg::ud16grf(114, 0), GenReg::retype(value, GEN_TYPE_UD));
- p->UNTYPED_WRITE(GenReg::vec16grf(112, 0), 0, 1);
- } else
- NOT_IMPLEMENTED;
-
-#endif
}
void GenContext::emitFenceInstruction(const ir::FenceInstruction &insn) {}
void GenContext::emitLabelInstruction(const ir::LabelInstruction &insn) {}
static INLINE GenReg vec16(uint32_t file, uint32_t nr, uint32_t subnr) {
return GenReg(file,
- nr,
- subnr,
- GEN_TYPE_F,
- GEN_VERTICAL_STRIDE_8,
- GEN_WIDTH_8,
- GEN_HORIZONTAL_STRIDE_1,
- GEN_SWIZZLE_XYZW,
- WRITEMASK_XYZW);
+ nr,
+ subnr,
+ GEN_TYPE_F,
+ GEN_VERTICAL_STRIDE_8,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_1,
+ GEN_SWIZZLE_XYZW,
+ WRITEMASK_XYZW);
}
static INLINE GenReg vec8(uint32_t file, uint32_t nr, uint32_t subnr) {
return GenReg(file,
- nr,
- subnr,
- GEN_TYPE_F,
- GEN_VERTICAL_STRIDE_8,
- GEN_WIDTH_8,
- GEN_HORIZONTAL_STRIDE_1,
- GEN_SWIZZLE_XYZW,
- WRITEMASK_XYZW);
+ nr,
+ subnr,
+ GEN_TYPE_F,
+ GEN_VERTICAL_STRIDE_8,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_1,
+ GEN_SWIZZLE_XYZW,
+ WRITEMASK_XYZW);
}
static INLINE GenReg vec4(uint32_t file, uint32_t nr, uint32_t subnr) {
return GenReg(file,
- nr,
- subnr,
- GEN_TYPE_F,
- GEN_VERTICAL_STRIDE_4,
- GEN_WIDTH_4,
- GEN_HORIZONTAL_STRIDE_1,
- GEN_SWIZZLE_XYZW,
- WRITEMASK_XYZW);
+ nr,
+ subnr,
+ GEN_TYPE_F,
+ GEN_VERTICAL_STRIDE_4,
+ GEN_WIDTH_4,
+ GEN_HORIZONTAL_STRIDE_1,
+ GEN_SWIZZLE_XYZW,
+ WRITEMASK_XYZW);
}
static INLINE GenReg vec2(uint32_t file, uint32_t nr, uint32_t subnr) {