GenReg genReg = this->genReg(reg, type);
if (this->isScalarReg(reg) == true)
return genReg;
- else {
- const uint32_t elemSz = typeSize(genReg.file);
- const uint32_t grfOffset = genReg.nr*GEN_REG_SIZE + elemSz*genReg.subnr;
- const uint32_t nextOffset = grfOffset + 8*(quarter-1)*elemSz;
- genReg.nr = nextOffset / GEN_REG_SIZE;
- genReg.subnr = (nextOffset % GEN_REG_SIZE) / elemSz;
- return genReg;
- }
+ else
+ return GenReg::Qn(genReg, quarter);
}
// Per-lane block IPs are always pre-allocated and used for branches. We just
p->push();
p->curr.flag = 0;
p->curr.subFlag = 1;
- p->CMP(GenReg::null(), GEN_CONDITIONAL_EQ, pred, GenReg::immuw(1));
+ p->CMP(GEN_CONDITIONAL_EQ, pred, GenReg::immuw(1));
// Update the PcIPs
p->MOV(ip, GenReg::immuw(uint16_t(dst)));
p->curr.predicate = GEN_PREDICATE_NONE;
p->curr.flag = 0;
p->curr.subFlag = 1;
- p->CMP(GenReg::null(), GEN_CONDITIONAL_G, ip, GenReg::immuw(nextLabel));
+ p->CMP(GEN_CONDITIONAL_G, ip, GenReg::immuw(nextLabel));
// Branch to the jump target
this->branchPos.insert(std::make_pair(&insn, p->insnNum));
p->push();
p->curr.flag = 0;
p->curr.subFlag = 1;
- p->CMP(GenReg::null(), GEN_CONDITIONAL_EQ, pred, GenReg::immuw(1));
+ p->CMP(GEN_CONDITIONAL_EQ, pred, GenReg::immuw(1));
// Update the PcIPs
p->MOV(ip, GenReg::immuw(uint16_t(dst)));
using namespace ir;
const Opcode opcode = insn.getOpcode();
const Type type = insn.getType();
- const RegisterFamily family = getFamily(type);
- const uint32_t typeSize = familySize[family];
const uint32_t genCmp = getGenCompare(opcode);
const GenReg dst = this->genReg(insn.getDst(0), TYPE_BOOL);
const GenReg src0 = this->genReg(insn.getSrc(0), type);
p->MOV(GenReg::flag(0,1), GenReg::flag(0,0));
p->pop();
- // Emit the compare instruction now. dwords require to push two SIMD8
- // instructions
- GBE_ASSERT(typeSize == 2 || typeSize == 4);
+ // Emit the compare instruction itself
p->push();
- if (this->simdWidth == 8 || typeSize == 2) {
- p->curr.flag = 0;
- p->curr.subFlag = 1;
- p->CMP(GenReg::null(), genCmp, src0, src1);
- } else if (this->simdWidth == 16) {
- const GenReg nextSrc0 = this->genRegQn(insn.getSrc(0), 2, type);
- const GenReg nextSrc1 = this->genRegQn(insn.getSrc(1), 2, type);
- p->curr.flag = 0;
- p->curr.subFlag = 1;
- p->curr.execWidth = 8;
- p->curr.quarterControl = GEN_COMPRESSION_Q1;
- p->CMP(GenReg::null(), genCmp, src0, src1);
- p->curr.quarterControl = GEN_COMPRESSION_Q2;
- p->CMP(GenReg::null(), genCmp, nextSrc0, nextSrc1);
- } else
- NOT_SUPPORTED;
+ p->curr.flag = 0;
+ p->curr.subFlag = 1;
+ p->CMP(genCmp, src0, src1);
p->pop();
// We emit a very unoptimized code where we store the resulting mask in a GRF
// We need two steps here to make the conversion
if (dstFamily != FAMILY_DWORD && srcFamily == FAMILY_DWORD) {
GenReg unpacked;
- const uint32_t vstride = simdWidth == 8 ? GEN_VERTICAL_STRIDE_8 : GEN_VERTICAL_STRIDE_16;
- if (dstFamily == FAMILY_WORD) {
+ if (dstFamily == FAMILY_WORD)
unpacked = GenReg(GEN_GENERAL_REGISTER_FILE,
112,
0,
dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W,
- vstride,
+ GEN_VERTICAL_STRIDE_16,
GEN_WIDTH_8,
GEN_HORIZONTAL_STRIDE_2);
- } else {
+ else
GBE_ASSERT(dstFamily == FAMILY_BYTE);
unpacked = GenReg(GEN_GENERAL_REGISTER_FILE,
112,
0,
dstType == TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B,
- vstride,
+ GEN_VERTICAL_STRIDE_32,
GEN_WIDTH_8,
GEN_HORIZONTAL_STRIDE_4);
- }
p->MOV(unpacked, src);
p->MOV(dst, unpacked);
- } else {
+ } else
p->MOV(dst, src);
- }
}
void GenContext::emitBranchInstruction(const ir::BranchInstruction &insn) {
void GenContext::emitFenceInstruction(const ir::FenceInstruction &insn) {}
void GenContext::emitLabelInstruction(const ir::LabelInstruction &insn) {
const ir::LabelIndex label = insn.getLabelIndex();
- const GenReg dst = GenReg::retype(GenReg::null(), GEN_TYPE_UW);
const GenReg src0 = this->genReg(blockIPReg);
const GenReg src1 = GenReg::immuw(label);
p->push();
p->curr.predicate = GEN_PREDICATE_NONE;
p->curr.flag = 0;
- p->CMP(dst, GEN_CONDITIONAL_LE, GenReg::retype(src0, GEN_TYPE_UW), src1);
+ p->CMP(GEN_CONDITIONAL_LE, GenReg::retype(src0, GEN_TYPE_UW), src1);
p->pop();
// If it is required, insert a JUMP to bypass the block
namespace gbe
{
+ //////////////////////////////////////////////////////////////////////////
+ // Some helper functions to encode
+ //////////////////////////////////////////////////////////////////////////
+ INLINE bool isVectorOfBytes(GenReg reg) {
+ if (reg.hstride != GEN_HORIZONTAL_STRIDE_0 &&
+ (reg.type == GEN_TYPE_UB || reg.type == GEN_TYPE_B))
+ return true;
+ else
+ return false;
+ }
+
+ INLINE bool needToSplitAlu1(GenEmitter *p, GenReg dst, GenReg src) {
+ if (p->curr.execWidth != 16) return false;
+ if (isVectorOfBytes(dst) == true) return true;
+ if (isVectorOfBytes(src) == true) return true;
+ return false;
+ }
+
+ INLINE bool needToSplitAlu2(GenEmitter *p, GenReg dst, GenReg src0, GenReg src1) {
+ if (p->curr.execWidth != 16) return false;
+ if (isVectorOfBytes(dst) == true) return true;
+ if (isVectorOfBytes(src0) == true) return true;
+ if (isVectorOfBytes(src1) == true) return true;
+ return false;
+ }
+
+ INLINE bool needToSplitCmp(GenEmitter *p, GenReg src0, GenReg src1) {
+ if (p->curr.execWidth != 16) return false;
+ if (isVectorOfBytes(src0) == true) return true;
+ if (isVectorOfBytes(src1) == true) return true;
+ if (src0.type == GEN_TYPE_D || src0.type == GEN_TYPE_UD || src0.type == GEN_TYPE_F)
+ return true;
+ if (src1.type == GEN_TYPE_D || src1.type == GEN_TYPE_UD || src1.type == GEN_TYPE_F)
+ return true;
+ return false;
+ }
+
+ //////////////////////////////////////////////////////////////////////////
+ // Gen Emitter encoding class
+ //////////////////////////////////////////////////////////////////////////
GenEmitter::GenEmitter(uint32_t simdWidth, uint32_t gen) :
insnNum(0), stateNum(0), gen(gen)
{
this->curr.flag = 0;
this->curr.subFlag = 0;
this->curr.predicate = GEN_PREDICATE_NORMAL;
- //this->curr.predicate = GEN_PREDICATE_NONE;
this->curr.inversePredicate = 0;
}
insn->bits1.da1.dest_horiz_stride = dest.hstride;
}
- static const int reg_type_size[8] = { 4, 4, 2, 2, 1, 1, 4 };
-
void GenEmitter::setSrc0(GenInstruction *insn, GenReg reg)
{
if (reg.file != GEN_ARCHITECTURE_REGISTER_FILE)
insn->bits3.da1.src1_abs = reg.absolute;
insn->bits3.da1.src1_negate = reg.negation;
- /* Only src1 can be immediate in two-argument instructions.
- */
assert(insn->bits1.da1.src0_reg_file != GEN_IMMEDIATE_VALUE);
if (reg.file == GEN_IMMEDIATE_VALUE)
return insn;
}
- INLINE GenInstruction *alu1(GenEmitter *p,
- uint32_t opcode,
- GenReg dest,
- GenReg src)
+ INLINE void alu1(GenEmitter *p, uint32_t opcode, GenReg dst, GenReg src)
{
- GenInstruction *insn = p->next(opcode);
- p->setHeader(insn);
- p->setDst(insn, dest);
- p->setSrc0(insn, src);
- return insn;
+ if (needToSplitAlu1(p, dst, src) == false) {
+ GenInstruction *insn = p->next(opcode);
+ p->setHeader(insn);
+ p->setDst(insn, dst);
+ p->setSrc0(insn, src);
+ } else {
+ GenInstruction *insnQ1, *insnQ2;
+
+ // Instruction for the first quarter
+ insnQ1 = p->next(opcode);
+ p->setHeader(insnQ1);
+ insnQ1->header.quarter_control = GEN_COMPRESSION_Q1;
+ insnQ1->header.execution_size = GEN_WIDTH_8;
+ p->setDst(insnQ1, dst);
+ p->setSrc0(insnQ1, src);
+
+ // Instruction for the second quarter
+ insnQ2 = p->next(opcode);
+ p->setHeader(insnQ2);
+ insnQ2->header.quarter_control = GEN_COMPRESSION_Q2;
+ insnQ2->header.execution_size = GEN_WIDTH_8;
+ p->setDst(insnQ2, GenReg::Qn(dst, 2));
+ p->setSrc0(insnQ2, GenReg::Qn(src, 2));
+ }
}
- INLINE GenInstruction *alu2(GenEmitter *p,
- uint32_t opcode,
- GenReg dest,
- GenReg src0,
- GenReg src1)
+ INLINE void alu2(GenEmitter *p,
+ uint32_t opcode,
+ GenReg dst,
+ GenReg src0,
+ GenReg src1,
+ int accWriteControl = 0)
{
- GenInstruction *insn = p->next(opcode);
- p->setHeader(insn);
- p->setDst(insn, dest);
- p->setSrc0(insn, src0);
- p->setSrc1(insn, src1);
- return insn;
+ if (needToSplitAlu2(p, dst, src0, src1) == false) {
+ GenInstruction *insn = p->next(opcode);
+ p->setHeader(insn);
+ insn->header.acc_wr_control = accWriteControl;
+ p->setDst(insn, dst);
+ p->setSrc0(insn, src0);
+ p->setSrc1(insn, src1);
+ } else {
+ GenInstruction *insnQ1, *insnQ2;
+
+ // Instruction for the first quarter
+ insnQ1 = p->next(opcode);
+ p->setHeader(insnQ1);
+ insnQ1->header.acc_wr_control = accWriteControl;
+ insnQ1->header.quarter_control = GEN_COMPRESSION_Q1;
+ insnQ1->header.execution_size = GEN_WIDTH_8;
+ p->setDst(insnQ1, dst);
+ p->setSrc0(insnQ1, src0);
+ p->setSrc1(insnQ1, src1);
+
+ // Instruction for the second quarter
+ insnQ2 = p->next(opcode);
+ p->setHeader(insnQ2);
+ insnQ2->header.acc_wr_control = accWriteControl;
+ insnQ2->header.quarter_control = GEN_COMPRESSION_Q2;
+ insnQ2->header.execution_size = GEN_WIDTH_8;
+ p->setDst(insnQ2, GenReg::Qn(dst, 2));
+ p->setSrc0(insnQ2, GenReg::Qn(src0, 2));
+ p->setSrc1(insnQ2, GenReg::Qn(src1, 2));
+ }
}
#if 0
#endif
#define ALU1(OP) \
- GenInstruction *GenEmitter::OP(GenReg dest, GenReg src0) \
+ void GenEmitter::OP(GenReg dest, GenReg src0) \
{ \
- return alu1(this, GEN_OPCODE_##OP, dest, src0); \
+ alu1(this, GEN_OPCODE_##OP, dest, src0); \
}
#define ALU2(OP) \
- GenInstruction *GenEmitter::OP(GenReg dest, GenReg src0, GenReg src1) \
+ void GenEmitter::OP(GenReg dest, GenReg src0, GenReg src1) \
{ \
- return alu2(this, GEN_OPCODE_##OP, dest, src0, src1); \
+ alu2(this, GEN_OPCODE_##OP, dest, src0, src1); \
}
#define ALU3(OP) \
- GenInstruction *GenEmitter::OP(GenReg dest, GenReg src0, GenReg src1, GenReg src2) \
+ void GenEmitter::OP(GenReg dest, GenReg src0, GenReg src1, GenReg src2) \
{ \
- return alu3(this, GEN_OPCODE_##OP, dest, src0, src1, src2); \
+ alu3(this, GEN_OPCODE_##OP, dest, src0, src1, src2); \
}
ALU1(MOV)
ALU2(PLN)
// ALU3(MAD)
- GenInstruction *GenEmitter::MACH(GenReg dest, GenReg src0, GenReg src1)
- {
- GenInstruction *insn = alu2(this, GEN_OPCODE_MACH, dest, src0, src1);
- insn->header.acc_wr_control = 1;
- return insn;
+ void GenEmitter::MACH(GenReg dest, GenReg src0, GenReg src1) {
+ alu2(this, GEN_OPCODE_MACH, dest, src0, src1, 1);
}
- GenInstruction *GenEmitter::ADD(GenReg dest, GenReg src0, GenReg src1)
+ void GenEmitter::ADD(GenReg dest, GenReg src0, GenReg src1)
{
if (src0.type == GEN_TYPE_F ||
(src0.file == GEN_IMMEDIATE_VALUE &&
assert(src0.type != GEN_TYPE_D);
}
- return alu2(this, GEN_OPCODE_ADD, dest, src0, src1);
+ alu2(this, GEN_OPCODE_ADD, dest, src0, src1);
}
- GenInstruction *GenEmitter::MUL(GenReg dest, GenReg src0, GenReg src1)
+ void GenEmitter::MUL(GenReg dest, GenReg src0, GenReg src1)
{
/* 6.32.38: mul */
if (src0.type == GEN_TYPE_D ||
assert(src1.file != GEN_ARCHITECTURE_REGISTER_FILE ||
src1.nr != GEN_ARF_ACCUMULATOR);
- return alu2(this, GEN_OPCODE_MUL, dest, src0, src1);
+ alu2(this, GEN_OPCODE_MUL, dest, src0, src1);
}
this->setSrc1(&insn, GenReg::immd(jumpDistance));
}
- void GenEmitter::CMP(GenReg dest, uint32_t conditional, GenReg src0, GenReg src1)
+ void GenEmitter::CMP(uint32_t conditional, GenReg src0, GenReg src1)
{
- GenInstruction *insn = this->next(GEN_OPCODE_CMP);
-
- insn->header.destreg_or_condmod = conditional;
- this->setHeader(insn);
- this->setDst(insn, dest);
- this->setSrc0(insn, src0);
- this->setSrc1(insn, src1);
+ if (needToSplitCmp(this, src0, src1) == false) {
+ GenInstruction *insn = this->next(GEN_OPCODE_CMP);
+ this->setHeader(insn);
+ insn->header.destreg_or_condmod = conditional;
+ this->setDst(insn, GenReg::null());
+ this->setSrc0(insn, src0);
+ this->setSrc1(insn, src1);
+ } else {
+ GenInstruction *insnQ1, *insnQ2;
+
+ // Instruction for the first quarter
+ insnQ1 = this->next(GEN_OPCODE_CMP);
+ this->setHeader(insnQ1);
+ insnQ1->header.quarter_control = GEN_COMPRESSION_Q1;
+ insnQ1->header.execution_size = GEN_WIDTH_8;
+ insnQ1->header.destreg_or_condmod = conditional;
+ this->setDst(insnQ1, GenReg::null());
+ this->setSrc0(insnQ1, src0);
+ this->setSrc1(insnQ1, src1);
+
+ // Instruction for the second quarter
+ insnQ2 = this->next(GEN_OPCODE_CMP);
+ this->setHeader(insnQ2);
+ insnQ2->header.quarter_control = GEN_COMPRESSION_Q2;
+ insnQ2->header.execution_size = GEN_WIDTH_8;
+ insnQ2->header.destreg_or_condmod = conditional;
+ this->setDst(insnQ2, GenReg::null());
+ this->setSrc0(insnQ2, GenReg::Qn(src0, 2));
+ this->setSrc1(insnQ2, GenReg::Qn(src1, 2));
+ }
}
void GenEmitter::WAIT(void)
return 2;
case GEN_TYPE_UB:
case GEN_TYPE_B:
- return 2;
+ return 1;
default:
+ assert(0);
return 0;
}
}
- /*! This is almost always called with a numeric constant argument, so make
- * things easy to evaluate at compile time:
- */
- INLINE uint32_t cvt(uint32_t val) {
- switch (val) {
- case 0: return 0;
- case 1: return 1;
- case 2: return 2;
- case 4: return 3;
- case 8: return 4;
- case 16: return 5;
- case 32: return 6;
- }
- return 0;
+ /*! Convert a hstride to a number of element */
+ INLINE uint32_t stride(uint32_t stride) {
+ switch (stride) {
+ case 0: return 0;
+ case 1: return 1;
+ case 2: return 2;
+ case 3: return 4;
+ case 4: return 8;
+ case 5: return 16;
+ default: assert(0); return 0;
+ }
}
/*! These are not hardware structs, just something useful to pass around */
this->pad0 = 0;
}
+ static INLINE GenReg Qn(GenReg reg, uint32_t quarter) {
+ if (reg.hstride == GEN_HORIZONTAL_STRIDE_0) // scalar register
+ return reg;
+ else {
+ const uint32_t typeSz = typeSize(reg.type);
+ const uint32_t horizontal = stride(reg.hstride);
+ const uint32_t grfOffset = reg.nr*GEN_REG_SIZE + typeSz*reg.subnr;
+ const uint32_t nextOffset = grfOffset + 8*(quarter-1)*typeSz*horizontal;
+ reg.nr = nextOffset / GEN_REG_SIZE;
+ reg.subnr = (nextOffset % GEN_REG_SIZE) / typeSz;
+ return reg;
+ }
+ }
+
static INLINE GenReg vec16(uint32_t file, uint32_t nr, uint32_t subnr) {
return GenReg(file,
nr,
static INLINE GenReg uw1(uint32_t file, uint32_t nr, uint32_t subnr) {
return suboffset(retype(vec1(file, nr, 0), GEN_TYPE_UW), subnr);
}
-#if 0
+
static INLINE GenReg ub16(uint32_t file, uint32_t nr, uint32_t subnr) {
return GenReg(file,
nr,
subnr,
GEN_TYPE_UB,
GEN_VERTICAL_STRIDE_16,
- GEN_WIDTH_16,
+ GEN_WIDTH_8,
GEN_HORIZONTAL_STRIDE_2);
}
static INLINE GenReg ub1(uint32_t file, uint32_t nr, uint32_t subnr) {
return suboffset(retype(vec1(file, nr, 0), GEN_TYPE_UB), subnr);
}
-#else
- static INLINE GenReg ub16(uint32_t file, uint32_t nr, uint32_t subnr) {
- return suboffset(retype(vec16(file, nr, 0), GEN_TYPE_UW), subnr);
- }
-
- static INLINE GenReg ub8(uint32_t file, uint32_t nr, uint32_t subnr) {
- return suboffset(retype(vec8(file, nr, 0), GEN_TYPE_UW), subnr);
- }
-
- static INLINE GenReg ub1(uint32_t file, uint32_t nr, uint32_t subnr) {
- return suboffset(retype(vec1(file, nr, 0), GEN_TYPE_UW), subnr);
- }
-#endif
static INLINE GenReg imm(uint32_t type) {
return GenReg(GEN_IMMEDIATE_VALUE,
return reg;
}
- static INLINE GenReg stride(GenReg reg, uint32_t vstride, uint32_t width, uint32_t hstride) {
- reg.vstride = cvt(vstride);
- reg.width = cvt(width) - 1;
- reg.hstride = cvt(hstride);
- return reg;
- }
-
static INLINE GenReg negate(GenReg reg) {
reg.negation ^= 1;
return reg;
// Encoding functions
////////////////////////////////////////////////////////////////////////
-#define ALU1(OP) GenInstruction *OP(GenReg dest, GenReg src0);
-#define ALU2(OP) GenInstruction *OP(GenReg dest, GenReg src0, GenReg src1);
-#define ALU3(OP) GenInstruction *OP(GenReg dest, GenReg src0, GenReg src1, GenReg src2);
+#define ALU1(OP) void OP(GenReg dest, GenReg src0);
+#define ALU2(OP) void OP(GenReg dest, GenReg src0, GenReg src1);
+#define ALU3(OP) void OP(GenReg dest, GenReg src0, GenReg src1, GenReg src2);
ALU1(MOV)
ALU1(RNDZ)
ALU1(RNDE)
/*! Jump indexed instruction */
void JMPI(GenReg src);
/*! Compare instructions */
- void CMP(GenReg dst, uint32_t conditional, GenReg src0, GenReg src1);
+ void CMP(uint32_t conditional, GenReg src0, GenReg src1);
/*! EOT is used to finish GPGPU threads */
void EOT(uint32_t msg_nr);
/*! No-op */