From c9cc192ca00c2cbb43bd72ce733732ce8c2cb874 Mon Sep 17 00:00:00 2001 From: Zhigang Gong Date: Fri, 16 May 2014 19:06:08 +0800 Subject: [PATCH] GBE: optimize CMP instruction encoding. This patch fixes the following two things. 1. Use a temporary register as dst register for the CMP instruction in the middle of a block. 2. fix the switch flag for the CMP instruction at the begining of each block. As the compact instruction handling will handle the cmp instruction directly, and will ignore the switch flag which is incorrect. This patch could get about 2-3% performance gain for luxmark. Signed-off-by: Zhigang Gong Reviewed-by: "Song, Ruiling" --- backend/src/backend/gen_encoder.cpp | 9 +++- backend/src/backend/gen_insn_selection.cpp | 84 +++++++++++++++++------------- backend/src/backend/gen_insn_selection.hpp | 4 +- backend/src/backend/gen_reg_allocation.cpp | 33 ++++++++---- 4 files changed, 79 insertions(+), 51 deletions(-) diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index 7078dcb..eadd94f 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -1151,13 +1151,14 @@ namespace gbe void GenEncoder::CMP(uint32_t conditional, GenRegister src0, GenRegister src1, GenRegister dst) { if (needToSplitCmp(this, src0, src1) == false) { - if(compactAlu2(this, GEN_OPCODE_CMP, dst, src0, src1, conditional, false)) { + if(!GenRegister::isNull(dst) && compactAlu2(this, GEN_OPCODE_CMP, dst, src0, src1, conditional, false)) { return; } GenNativeInstruction *insn = this->next(GEN_OPCODE_CMP); this->setHeader(insn); insn->header.destreg_or_condmod = conditional; - insn->header.thread_control = GEN_THREAD_SWITCH; + if (GenRegister::isNull(dst)) + insn->header.thread_control = GEN_THREAD_SWITCH; this->setDst(insn, dst); this->setSrc0(insn, src0); this->setSrc1(insn, src1); @@ -1167,6 +1168,8 @@ namespace gbe // Instruction for the first quarter insnQ1 = this->next(GEN_OPCODE_CMP); this->setHeader(insnQ1); + if (GenRegister::isNull(dst)) + insnQ1->header.thread_control = GEN_THREAD_SWITCH; insnQ1->header.quarter_control = GEN_COMPRESSION_Q1; insnQ1->header.execution_size = GEN_WIDTH_8; insnQ1->header.destreg_or_condmod = conditional; @@ -1177,6 +1180,8 @@ namespace gbe // Instruction for the second quarter insnQ2 = this->next(GEN_OPCODE_CMP); this->setHeader(insnQ2); + if (GenRegister::isNull(dst)) + insnQ2->header.thread_control = GEN_THREAD_SWITCH; insnQ2->header.quarter_control = GEN_COMPRESSION_Q2; insnQ2->header.execution_size = GEN_WIDTH_8; insnQ2->header.destreg_or_condmod = conditional; diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 2ab3aae..24ac17d 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -311,9 +311,9 @@ namespace gbe /*! Implement public class */ INLINE uint32_t getVectorNum(void) const { return this->vectorNum; } /*! Implement public class */ - INLINE ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID); + INLINE ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov); /*! Implement public class */ - INLINE ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID); + INLINE ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov); /*! spill a register (insert spill/unspill instructions) */ INLINE bool spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool); /*! should add per thread offset to the local memory address when load/store/atomic */ @@ -854,48 +854,56 @@ namespace gbe return true; } - ir::Register Selection::Opaque::replaceSrc(SelectionInstruction *insn, uint32_t regID) { + ir::Register Selection::Opaque::replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) { SelectionBlock *block = insn->parent; const uint32_t simdWidth = insn->state.execWidth; ir::Register tmp; + GenRegister gr; // This will append the temporary register in the instruction block this->block = block; - tmp = this->reg(ir::FAMILY_DWORD); - - // Generate the MOV instruction and replace the register in the instruction - SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1); - mov->src(0) = GenRegister::retype(insn->src(regID), GEN_TYPE_F); - mov->state = GenInstructionState(simdWidth); - if (this->isScalarReg(insn->src(regID).reg())) - mov->state.noMask = 1; - insn->src(regID) = mov->dst(0) = GenRegister::fxgrf(simdWidth, tmp); - insn->prepend(*mov); + tmp = this->reg(ir::getFamily(type), simdWidth == 1); + gr = this->selReg(tmp, type); + if (needMov) { + // Generate the MOV instruction and replace the register in the instruction + SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1); + mov->src(0) = GenRegister::retype(insn->src(regID), gr.type); + mov->state = GenInstructionState(simdWidth); + if (this->isScalarReg(insn->src(regID).reg())) + mov->state.noMask = 1; + mov->dst(0) = gr; + insn->prepend(*mov); + } + insn->src(regID) = gr; return tmp; } - ir::Register Selection::Opaque::replaceDst(SelectionInstruction *insn, uint32_t regID) { + ir::Register Selection::Opaque::replaceDst(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) { SelectionBlock *block = insn->parent; - uint32_t simdWidth = this->isScalarReg(insn->dst(regID).reg()) ? 1 : insn->state.execWidth; + uint32_t simdWidth; + if (!GenRegister::isNull(insn->dst(regID))) + simdWidth = this->isScalarReg(insn->dst(regID).reg()) ? 1 : insn->state.execWidth; + else { + GBE_ASSERT(needMov == false); + simdWidth = insn->state.execWidth; + } ir::Register tmp; - ir::RegisterFamily f = file.get(insn->dst(regID).reg()).family; - int genType = f == ir::FAMILY_QWORD ? GEN_TYPE_DF : GEN_TYPE_F; GenRegister gr; - - // This will append the temporary register in the instruction block this->block = block; - tmp = this->reg(f); - + tmp = this->reg(ir::getFamily(type)); + gr = this->selReg(tmp, type); + if (needMov) { // Generate the MOV instruction and replace the register in the instruction - SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1); - mov->dst(0) = GenRegister::retype(insn->dst(regID), genType); - mov->state = GenInstructionState(simdWidth); - if (simdWidth == 1) - mov->state.noMask = 1; - gr = f == ir::FAMILY_QWORD ? GenRegister::dfxgrf(simdWidth, tmp) : GenRegister::fxgrf(simdWidth, tmp); - insn->dst(regID) = mov->src(0) = gr; - insn->append(*mov); + SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1); + mov->dst(0) = GenRegister::retype(insn->dst(regID), gr.type); + mov->state = GenInstructionState(simdWidth); + if (simdWidth == 1) + mov->state.noMask = 1; + mov->src(0) = gr; + insn->append(*mov); + } + insn->dst(regID) = gr; return tmp; } @@ -1640,12 +1648,12 @@ namespace gbe return this->opaque->getRegisterData(reg); } - ir::Register Selection::replaceSrc(SelectionInstruction *insn, uint32_t regID) { - return this->opaque->replaceSrc(insn, regID); + ir::Register Selection::replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) { + return this->opaque->replaceSrc(insn, regID, type, needMov); } - ir::Register Selection::replaceDst(SelectionInstruction *insn, uint32_t regID) { - return this->opaque->replaceDst(insn, regID); + ir::Register Selection::replaceDst(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) { + return this->opaque->replaceDst(insn, regID, type, needMov); } bool Selection::spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool) { return this->opaque->spillRegs(spilledRegs, registerPool); @@ -2918,7 +2926,7 @@ namespace gbe type == TYPE_DOUBLE || type == TYPE_FLOAT || type == TYPE_U32 || type == TYPE_S32 /*|| (!needStoreBool)*/) - tmpDst = GenRegister::nullud(); + tmpDst = GenRegister::retype(GenRegister::null(), GEN_TYPE_F); else tmpDst = sel.selReg(dst, TYPE_BOOL); @@ -2975,7 +2983,7 @@ namespace gbe // the dst to null register. And let the flag reg allocation // function to generate the flag grf on demand correctly latter. sel.curr.flagGen = needStoreBool; - tmpDst = GenRegister::nullud(); + tmpDst = GenRegister::retype(GenRegister::null(), GEN_TYPE_UW); } sel.CMP(getGenCompare(opcode), src0, src1, tmpDst); } @@ -3308,7 +3316,8 @@ namespace gbe sel.push(); sel.curr.noMask = 1; sel.curr.predicate = GEN_PREDICATE_NONE; - sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW)); + sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1, + GenRegister::retype(GenRegister::null(), GEN_TYPE_UW)); sel.pop(); if (sel.block->hasBarrier) { @@ -3321,7 +3330,8 @@ namespace gbe sel.MOV(GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL)); sel.curr.predicate = GEN_PREDICATE_NONE; sel.curr.noMask = 1; - sel.CMP(GEN_CONDITIONAL_EQ, GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL)); + sel.CMP(GEN_CONDITIONAL_EQ, GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL), + GenRegister::retype(GenRegister::null(), GEN_TYPE_UW)); if (simdWidth == 8) sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H; else if (simdWidth == 16) diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp index 2595fce..6ce2249 100644 --- a/backend/src/backend/gen_insn_selection.hpp +++ b/backend/src/backend/gen_insn_selection.hpp @@ -220,9 +220,9 @@ namespace gbe /*! Get the data for the given register */ ir::RegisterData getRegisterData(ir::Register reg) const; /*! Replace a source by the returned temporary register */ - ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID); + ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type = ir::TYPE_FLOAT, bool needMov = true); /*! Replace a destination to the returned temporary register */ - ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID); + ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID, ir::Type type = ir::TYPE_FLOAT, bool needMov = true); /*! spill a register (insert spill/unspill instructions) */ bool spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool); /*! Indicate if a register is scalar or not */ diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp index 880a267..8349e9a 100644 --- a/backend/src/backend/gen_reg_allocation.cpp +++ b/backend/src/backend/gen_reg_allocation.cpp @@ -188,6 +188,21 @@ namespace gbe INLINE bool spillReg(ir::Register reg, bool isAllocated = false); INLINE bool vectorCanSpill(SelectionVector *vector); INLINE void allocateScratchForSpilled(); + + /*! replace specified source/dst register with temporary register and update interval */ + INLINE ir::Register replaceReg(Selection &sel, SelectionInstruction *insn, + uint32_t regID, bool isSrc, + ir::Type type = ir::TYPE_FLOAT, bool needMov = true) { + ir::Register reg; + if (isSrc) + reg = sel.replaceSrc(insn, regID, type, needMov); + else + reg = sel.replaceDst(insn, regID, type, needMov); + intervals.push_back(reg); + intervals[reg].minID = insn->ID; + intervals[reg].maxID = insn->ID; + return reg; + } /*! Use custom allocator */ GBE_CLASS(Opaque); }; @@ -301,15 +316,9 @@ namespace gbe // the MOVs else { ir::Register tmp; - if (vector->isSrc) - tmp = selection.replaceSrc(vector->insn, regID); - else - tmp = selection.replaceDst(vector->insn, regID); + tmp = this->replaceReg(selection, vector->insn, regID, vector->isSrc); const VectorLocation location = std::make_pair(vector, regID); this->vectorMap.insert(std::make_pair(tmp, location)); - intervals.push_back(tmp); - intervals[tmp].minID = vector->insn->ID; - intervals[tmp].maxID = vector->insn->ID; } } } @@ -590,12 +599,16 @@ namespace gbe if (insn.state.predicate != GEN_PREDICATE_NONE) validateFlag(selection, insn); } - // This is a CMP for a pure flag booleans, we don't need to write result to // the grf. And latter, we will not allocate grf for it. if (insn.opcode == SEL_OP_CMP && - flagBooleans.contains((ir::Register)(insn.dst(0).value.reg))) - insn.dst(0) = GenRegister::null(); + (flagBooleans.contains(insn.dst(0).reg()) || + GenRegister::isNull(insn.dst(0)))) { + // set a temporary register to avoid switch in this block. + bool isSrc = false; + bool needMov = false; + this->replaceReg(selection, &insn, 0, isSrc, ir::TYPE_FLOAT, needMov); + } // If the instruction requires to generate (CMP for long/int/float..) // the flag value to the register, and it's not a pure flag boolean, // we need to use SEL instruction to generate the flag value to the UW8 -- 2.7.4