From 39b3c0910fcd34f4589fcf71f7293ec141a4425f Mon Sep 17 00:00:00 2001 From: Benjamin Segovia Date: Fri, 11 May 2012 17:25:07 +0000 Subject: [PATCH] Simplified a lot the encoding and the back end. Added support for bytes --- backend/src/backend/gen_context.cpp | 57 +++-------- backend/src/backend/gen_eu.cpp | 192 +++++++++++++++++++++++++++--------- backend/src/backend/gen_eu.hpp | 74 +++++++------- 3 files changed, 192 insertions(+), 131 deletions(-) diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 9f8bd8b..54080c7 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -88,14 +88,8 @@ namespace gbe GenReg genReg = this->genReg(reg, type); if (this->isScalarReg(reg) == true) return genReg; - else { - const uint32_t elemSz = typeSize(genReg.file); - const uint32_t grfOffset = genReg.nr*GEN_REG_SIZE + elemSz*genReg.subnr; - const uint32_t nextOffset = grfOffset + 8*(quarter-1)*elemSz; - genReg.nr = nextOffset / GEN_REG_SIZE; - genReg.subnr = (nextOffset % GEN_REG_SIZE) / elemSz; - return genReg; - } + else + return GenReg::Qn(genReg, quarter); } // Per-lane block IPs are always pre-allocated and used for branches. We just @@ -296,7 +290,7 @@ namespace gbe p->push(); p->curr.flag = 0; p->curr.subFlag = 1; - p->CMP(GenReg::null(), GEN_CONDITIONAL_EQ, pred, GenReg::immuw(1)); + p->CMP(GEN_CONDITIONAL_EQ, pred, GenReg::immuw(1)); // Update the PcIPs p->MOV(ip, GenReg::immuw(uint16_t(dst))); @@ -311,7 +305,7 @@ namespace gbe p->curr.predicate = GEN_PREDICATE_NONE; p->curr.flag = 0; p->curr.subFlag = 1; - p->CMP(GenReg::null(), GEN_CONDITIONAL_G, ip, GenReg::immuw(nextLabel)); + p->CMP(GEN_CONDITIONAL_G, ip, GenReg::immuw(nextLabel)); // Branch to the jump target this->branchPos.insert(std::make_pair(&insn, p->insnNum)); @@ -364,7 +358,7 @@ namespace gbe p->push(); p->curr.flag = 0; p->curr.subFlag = 1; - p->CMP(GenReg::null(), GEN_CONDITIONAL_EQ, pred, GenReg::immuw(1)); + p->CMP(GEN_CONDITIONAL_EQ, pred, GenReg::immuw(1)); // Update the PcIPs p->MOV(ip, GenReg::immuw(uint16_t(dst))); @@ -444,8 +438,6 @@ namespace gbe using namespace ir; const Opcode opcode = insn.getOpcode(); const Type type = insn.getType(); - const RegisterFamily family = getFamily(type); - const uint32_t typeSize = familySize[family]; const uint32_t genCmp = getGenCompare(opcode); const GenReg dst = this->genReg(insn.getDst(0), TYPE_BOOL); const GenReg src0 = this->genReg(insn.getSrc(0), type); @@ -459,26 +451,11 @@ namespace gbe p->MOV(GenReg::flag(0,1), GenReg::flag(0,0)); p->pop(); - // Emit the compare instruction now. dwords require to push two SIMD8 - // instructions - GBE_ASSERT(typeSize == 2 || typeSize == 4); + // Emit the compare instruction itself p->push(); - if (this->simdWidth == 8 || typeSize == 2) { - p->curr.flag = 0; - p->curr.subFlag = 1; - p->CMP(GenReg::null(), genCmp, src0, src1); - } else if (this->simdWidth == 16) { - const GenReg nextSrc0 = this->genRegQn(insn.getSrc(0), 2, type); - const GenReg nextSrc1 = this->genRegQn(insn.getSrc(1), 2, type); - p->curr.flag = 0; - p->curr.subFlag = 1; - p->curr.execWidth = 8; - p->curr.quarterControl = GEN_COMPRESSION_Q1; - p->CMP(GenReg::null(), genCmp, src0, src1); - p->curr.quarterControl = GEN_COMPRESSION_Q2; - p->CMP(GenReg::null(), genCmp, nextSrc0, nextSrc1); - } else - NOT_SUPPORTED; + p->curr.flag = 0; + p->curr.subFlag = 1; + p->CMP(genCmp, src0, src1); p->pop(); // We emit a very unoptimized code where we store the resulting mask in a GRF @@ -503,30 +480,27 @@ namespace gbe // We need two steps here to make the conversion if (dstFamily != FAMILY_DWORD && srcFamily == FAMILY_DWORD) { GenReg unpacked; - const uint32_t vstride = simdWidth == 8 ? GEN_VERTICAL_STRIDE_8 : GEN_VERTICAL_STRIDE_16; - if (dstFamily == FAMILY_WORD) { + if (dstFamily == FAMILY_WORD) unpacked = GenReg(GEN_GENERAL_REGISTER_FILE, 112, 0, dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W, - vstride, + GEN_VERTICAL_STRIDE_16, GEN_WIDTH_8, GEN_HORIZONTAL_STRIDE_2); - } else { + else GBE_ASSERT(dstFamily == FAMILY_BYTE); unpacked = GenReg(GEN_GENERAL_REGISTER_FILE, 112, 0, dstType == TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B, - vstride, + GEN_VERTICAL_STRIDE_32, GEN_WIDTH_8, GEN_HORIZONTAL_STRIDE_4); - } p->MOV(unpacked, src); p->MOV(dst, unpacked); - } else { + } else p->MOV(dst, src); - } } void GenContext::emitBranchInstruction(const ir::BranchInstruction &insn) { @@ -731,7 +705,6 @@ namespace gbe void GenContext::emitFenceInstruction(const ir::FenceInstruction &insn) {} void GenContext::emitLabelInstruction(const ir::LabelInstruction &insn) { const ir::LabelIndex label = insn.getLabelIndex(); - const GenReg dst = GenReg::retype(GenReg::null(), GEN_TYPE_UW); const GenReg src0 = this->genReg(blockIPReg); const GenReg src1 = GenReg::immuw(label); @@ -743,7 +716,7 @@ namespace gbe p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->curr.flag = 0; - p->CMP(dst, GEN_CONDITIONAL_LE, GenReg::retype(src0, GEN_TYPE_UW), src1); + p->CMP(GEN_CONDITIONAL_LE, GenReg::retype(src0, GEN_TYPE_UW), src1); p->pop(); // If it is required, insert a JUMP to bypass the block diff --git a/backend/src/backend/gen_eu.cpp b/backend/src/backend/gen_eu.cpp index 3353388..7a48e0f 100644 --- a/backend/src/backend/gen_eu.cpp +++ b/backend/src/backend/gen_eu.cpp @@ -28,6 +28,46 @@ namespace gbe { + ////////////////////////////////////////////////////////////////////////// + // Some helper functions to encode + ////////////////////////////////////////////////////////////////////////// + INLINE bool isVectorOfBytes(GenReg reg) { + if (reg.hstride != GEN_HORIZONTAL_STRIDE_0 && + (reg.type == GEN_TYPE_UB || reg.type == GEN_TYPE_B)) + return true; + else + return false; + } + + INLINE bool needToSplitAlu1(GenEmitter *p, GenReg dst, GenReg src) { + if (p->curr.execWidth != 16) return false; + if (isVectorOfBytes(dst) == true) return true; + if (isVectorOfBytes(src) == true) return true; + return false; + } + + INLINE bool needToSplitAlu2(GenEmitter *p, GenReg dst, GenReg src0, GenReg src1) { + if (p->curr.execWidth != 16) return false; + if (isVectorOfBytes(dst) == true) return true; + if (isVectorOfBytes(src0) == true) return true; + if (isVectorOfBytes(src1) == true) return true; + return false; + } + + INLINE bool needToSplitCmp(GenEmitter *p, GenReg src0, GenReg src1) { + if (p->curr.execWidth != 16) return false; + if (isVectorOfBytes(src0) == true) return true; + if (isVectorOfBytes(src1) == true) return true; + if (src0.type == GEN_TYPE_D || src0.type == GEN_TYPE_UD || src0.type == GEN_TYPE_F) + return true; + if (src1.type == GEN_TYPE_D || src1.type == GEN_TYPE_UD || src1.type == GEN_TYPE_F) + return true; + return false; + } + + ////////////////////////////////////////////////////////////////////////// + // Gen Emitter encoding class + ////////////////////////////////////////////////////////////////////////// GenEmitter::GenEmitter(uint32_t simdWidth, uint32_t gen) : insnNum(0), stateNum(0), gen(gen) { @@ -37,7 +77,6 @@ namespace gbe this->curr.flag = 0; this->curr.subFlag = 0; this->curr.predicate = GEN_PREDICATE_NORMAL; - //this->curr.predicate = GEN_PREDICATE_NONE; this->curr.inversePredicate = 0; } @@ -75,8 +114,6 @@ namespace gbe insn->bits1.da1.dest_horiz_stride = dest.hstride; } - static const int reg_type_size[8] = { 4, 4, 2, 2, 1, 1, 4 }; - void GenEmitter::setSrc0(GenInstruction *insn, GenReg reg) { if (reg.file != GEN_ARCHITECTURE_REGISTER_FILE) @@ -128,8 +165,6 @@ namespace gbe insn->bits3.da1.src1_abs = reg.absolute; insn->bits3.da1.src1_negate = reg.negation; - /* Only src1 can be immediate in two-argument instructions. - */ assert(insn->bits1.da1.src0_reg_file != GEN_IMMEDIATE_VALUE); if (reg.file == GEN_IMMEDIATE_VALUE) @@ -366,30 +401,71 @@ namespace gbe return insn; } - INLINE GenInstruction *alu1(GenEmitter *p, - uint32_t opcode, - GenReg dest, - GenReg src) + INLINE void alu1(GenEmitter *p, uint32_t opcode, GenReg dst, GenReg src) { - GenInstruction *insn = p->next(opcode); - p->setHeader(insn); - p->setDst(insn, dest); - p->setSrc0(insn, src); - return insn; + if (needToSplitAlu1(p, dst, src) == false) { + GenInstruction *insn = p->next(opcode); + p->setHeader(insn); + p->setDst(insn, dst); + p->setSrc0(insn, src); + } else { + GenInstruction *insnQ1, *insnQ2; + + // Instruction for the first quarter + insnQ1 = p->next(opcode); + p->setHeader(insnQ1); + insnQ1->header.quarter_control = GEN_COMPRESSION_Q1; + insnQ1->header.execution_size = GEN_WIDTH_8; + p->setDst(insnQ1, dst); + p->setSrc0(insnQ1, src); + + // Instruction for the second quarter + insnQ2 = p->next(opcode); + p->setHeader(insnQ2); + insnQ2->header.quarter_control = GEN_COMPRESSION_Q2; + insnQ2->header.execution_size = GEN_WIDTH_8; + p->setDst(insnQ2, GenReg::Qn(dst, 2)); + p->setSrc0(insnQ2, GenReg::Qn(src, 2)); + } } - INLINE GenInstruction *alu2(GenEmitter *p, - uint32_t opcode, - GenReg dest, - GenReg src0, - GenReg src1) + INLINE void alu2(GenEmitter *p, + uint32_t opcode, + GenReg dst, + GenReg src0, + GenReg src1, + int accWriteControl = 0) { - GenInstruction *insn = p->next(opcode); - p->setHeader(insn); - p->setDst(insn, dest); - p->setSrc0(insn, src0); - p->setSrc1(insn, src1); - return insn; + if (needToSplitAlu2(p, dst, src0, src1) == false) { + GenInstruction *insn = p->next(opcode); + p->setHeader(insn); + insn->header.acc_wr_control = accWriteControl; + p->setDst(insn, dst); + p->setSrc0(insn, src0); + p->setSrc1(insn, src1); + } else { + GenInstruction *insnQ1, *insnQ2; + + // Instruction for the first quarter + insnQ1 = p->next(opcode); + p->setHeader(insnQ1); + insnQ1->header.acc_wr_control = accWriteControl; + insnQ1->header.quarter_control = GEN_COMPRESSION_Q1; + insnQ1->header.execution_size = GEN_WIDTH_8; + p->setDst(insnQ1, dst); + p->setSrc0(insnQ1, src0); + p->setSrc1(insnQ1, src1); + + // Instruction for the second quarter + insnQ2 = p->next(opcode); + p->setHeader(insnQ2); + insnQ2->header.acc_wr_control = accWriteControl; + insnQ2->header.quarter_control = GEN_COMPRESSION_Q2; + insnQ2->header.execution_size = GEN_WIDTH_8; + p->setDst(insnQ2, GenReg::Qn(dst, 2)); + p->setSrc0(insnQ2, GenReg::Qn(src0, 2)); + p->setSrc1(insnQ2, GenReg::Qn(src1, 2)); + } } #if 0 @@ -463,21 +539,21 @@ namespace gbe #endif #define ALU1(OP) \ - GenInstruction *GenEmitter::OP(GenReg dest, GenReg src0) \ + void GenEmitter::OP(GenReg dest, GenReg src0) \ { \ - return alu1(this, GEN_OPCODE_##OP, dest, src0); \ + alu1(this, GEN_OPCODE_##OP, dest, src0); \ } #define ALU2(OP) \ - GenInstruction *GenEmitter::OP(GenReg dest, GenReg src0, GenReg src1) \ + void GenEmitter::OP(GenReg dest, GenReg src0, GenReg src1) \ { \ - return alu2(this, GEN_OPCODE_##OP, dest, src0, src1); \ + alu2(this, GEN_OPCODE_##OP, dest, src0, src1); \ } #define ALU3(OP) \ - GenInstruction *GenEmitter::OP(GenReg dest, GenReg src0, GenReg src1, GenReg src2) \ + void GenEmitter::OP(GenReg dest, GenReg src0, GenReg src1, GenReg src2) \ { \ - return alu3(this, GEN_OPCODE_##OP, dest, src0, src1, src2); \ + alu3(this, GEN_OPCODE_##OP, dest, src0, src1, src2); \ } ALU1(MOV) @@ -501,14 +577,11 @@ namespace gbe ALU2(PLN) // ALU3(MAD) - GenInstruction *GenEmitter::MACH(GenReg dest, GenReg src0, GenReg src1) - { - GenInstruction *insn = alu2(this, GEN_OPCODE_MACH, dest, src0, src1); - insn->header.acc_wr_control = 1; - return insn; + void GenEmitter::MACH(GenReg dest, GenReg src0, GenReg src1) { + alu2(this, GEN_OPCODE_MACH, dest, src0, src1, 1); } - GenInstruction *GenEmitter::ADD(GenReg dest, GenReg src0, GenReg src1) + void GenEmitter::ADD(GenReg dest, GenReg src0, GenReg src1) { if (src0.type == GEN_TYPE_F || (src0.file == GEN_IMMEDIATE_VALUE && @@ -524,10 +597,10 @@ namespace gbe assert(src0.type != GEN_TYPE_D); } - return alu2(this, GEN_OPCODE_ADD, dest, src0, src1); + alu2(this, GEN_OPCODE_ADD, dest, src0, src1); } - GenInstruction *GenEmitter::MUL(GenReg dest, GenReg src0, GenReg src1) + void GenEmitter::MUL(GenReg dest, GenReg src0, GenReg src1) { /* 6.32.38: mul */ if (src0.type == GEN_TYPE_D || @@ -556,7 +629,7 @@ namespace gbe assert(src1.file != GEN_ARCHITECTURE_REGISTER_FILE || src1.nr != GEN_ARF_ACCUMULATOR); - return alu2(this, GEN_OPCODE_MUL, dest, src0, src1); + alu2(this, GEN_OPCODE_MUL, dest, src0, src1); } @@ -581,15 +654,38 @@ namespace gbe this->setSrc1(&insn, GenReg::immd(jumpDistance)); } - void GenEmitter::CMP(GenReg dest, uint32_t conditional, GenReg src0, GenReg src1) + void GenEmitter::CMP(uint32_t conditional, GenReg src0, GenReg src1) { - GenInstruction *insn = this->next(GEN_OPCODE_CMP); - - insn->header.destreg_or_condmod = conditional; - this->setHeader(insn); - this->setDst(insn, dest); - this->setSrc0(insn, src0); - this->setSrc1(insn, src1); + if (needToSplitCmp(this, src0, src1) == false) { + GenInstruction *insn = this->next(GEN_OPCODE_CMP); + this->setHeader(insn); + insn->header.destreg_or_condmod = conditional; + this->setDst(insn, GenReg::null()); + this->setSrc0(insn, src0); + this->setSrc1(insn, src1); + } else { + GenInstruction *insnQ1, *insnQ2; + + // Instruction for the first quarter + insnQ1 = this->next(GEN_OPCODE_CMP); + this->setHeader(insnQ1); + insnQ1->header.quarter_control = GEN_COMPRESSION_Q1; + insnQ1->header.execution_size = GEN_WIDTH_8; + insnQ1->header.destreg_or_condmod = conditional; + this->setDst(insnQ1, GenReg::null()); + this->setSrc0(insnQ1, src0); + this->setSrc1(insnQ1, src1); + + // Instruction for the second quarter + insnQ2 = this->next(GEN_OPCODE_CMP); + this->setHeader(insnQ2); + insnQ2->header.quarter_control = GEN_COMPRESSION_Q2; + insnQ2->header.execution_size = GEN_WIDTH_8; + insnQ2->header.destreg_or_condmod = conditional; + this->setDst(insnQ2, GenReg::null()); + this->setSrc0(insnQ2, GenReg::Qn(src0, 2)); + this->setSrc1(insnQ2, GenReg::Qn(src1, 2)); + } } void GenEmitter::WAIT(void) diff --git a/backend/src/backend/gen_eu.hpp b/backend/src/backend/gen_eu.hpp index b7679b4..b61c6eb 100644 --- a/backend/src/backend/gen_eu.hpp +++ b/backend/src/backend/gen_eu.hpp @@ -53,26 +53,24 @@ namespace gbe return 2; case GEN_TYPE_UB: case GEN_TYPE_B: - return 2; + return 1; default: + assert(0); return 0; } } - /*! This is almost always called with a numeric constant argument, so make - * things easy to evaluate at compile time: - */ - INLINE uint32_t cvt(uint32_t val) { - switch (val) { - case 0: return 0; - case 1: return 1; - case 2: return 2; - case 4: return 3; - case 8: return 4; - case 16: return 5; - case 32: return 6; - } - return 0; + /*! Convert a hstride to a number of element */ + INLINE uint32_t stride(uint32_t stride) { + switch (stride) { + case 0: return 0; + case 1: return 1; + case 2: return 2; + case 3: return 4; + case 4: return 8; + case 5: return 16; + default: assert(0); return 0; + } } /*! These are not hardware structs, just something useful to pass around */ @@ -108,6 +106,20 @@ namespace gbe this->pad0 = 0; } + static INLINE GenReg Qn(GenReg reg, uint32_t quarter) { + if (reg.hstride == GEN_HORIZONTAL_STRIDE_0) // scalar register + return reg; + else { + const uint32_t typeSz = typeSize(reg.type); + const uint32_t horizontal = stride(reg.hstride); + const uint32_t grfOffset = reg.nr*GEN_REG_SIZE + typeSz*reg.subnr; + const uint32_t nextOffset = grfOffset + 8*(quarter-1)*typeSz*horizontal; + reg.nr = nextOffset / GEN_REG_SIZE; + reg.subnr = (nextOffset % GEN_REG_SIZE) / typeSz; + return reg; + } + } + static INLINE GenReg vec16(uint32_t file, uint32_t nr, uint32_t subnr) { return GenReg(file, nr, @@ -195,14 +207,14 @@ namespace gbe static INLINE GenReg uw1(uint32_t file, uint32_t nr, uint32_t subnr) { return suboffset(retype(vec1(file, nr, 0), GEN_TYPE_UW), subnr); } -#if 0 + static INLINE GenReg ub16(uint32_t file, uint32_t nr, uint32_t subnr) { return GenReg(file, nr, subnr, GEN_TYPE_UB, GEN_VERTICAL_STRIDE_16, - GEN_WIDTH_16, + GEN_WIDTH_8, GEN_HORIZONTAL_STRIDE_2); } @@ -219,19 +231,6 @@ namespace gbe static INLINE GenReg ub1(uint32_t file, uint32_t nr, uint32_t subnr) { return suboffset(retype(vec1(file, nr, 0), GEN_TYPE_UB), subnr); } -#else - static INLINE GenReg ub16(uint32_t file, uint32_t nr, uint32_t subnr) { - return suboffset(retype(vec16(file, nr, 0), GEN_TYPE_UW), subnr); - } - - static INLINE GenReg ub8(uint32_t file, uint32_t nr, uint32_t subnr) { - return suboffset(retype(vec8(file, nr, 0), GEN_TYPE_UW), subnr); - } - - static INLINE GenReg ub1(uint32_t file, uint32_t nr, uint32_t subnr) { - return suboffset(retype(vec1(file, nr, 0), GEN_TYPE_UW), subnr); - } -#endif static INLINE GenReg imm(uint32_t type) { return GenReg(GEN_IMMEDIATE_VALUE, @@ -401,13 +400,6 @@ namespace gbe return reg; } - static INLINE GenReg stride(GenReg reg, uint32_t vstride, uint32_t width, uint32_t hstride) { - reg.vstride = cvt(vstride); - reg.width = cvt(width) - 1; - reg.hstride = cvt(hstride); - return reg; - } - static INLINE GenReg negate(GenReg reg) { reg.negation ^= 1; return reg; @@ -490,9 +482,9 @@ namespace gbe // Encoding functions //////////////////////////////////////////////////////////////////////// -#define ALU1(OP) GenInstruction *OP(GenReg dest, GenReg src0); -#define ALU2(OP) GenInstruction *OP(GenReg dest, GenReg src0, GenReg src1); -#define ALU3(OP) GenInstruction *OP(GenReg dest, GenReg src0, GenReg src1, GenReg src2); +#define ALU1(OP) void OP(GenReg dest, GenReg src0); +#define ALU2(OP) void OP(GenReg dest, GenReg src0, GenReg src1); +#define ALU3(OP) void OP(GenReg dest, GenReg src0, GenReg src1, GenReg src2); ALU1(MOV) ALU1(RNDZ) ALU1(RNDE) @@ -523,7 +515,7 @@ namespace gbe /*! Jump indexed instruction */ void JMPI(GenReg src); /*! Compare instructions */ - void CMP(GenReg dst, uint32_t conditional, GenReg src0, GenReg src1); + void CMP(uint32_t conditional, GenReg src0, GenReg src1); /*! EOT is used to finish GPGPU threads */ void EOT(uint32_t msg_nr); /*! No-op */ -- 2.7.4