From: Zhigang Gong Date: Thu, 27 Mar 2014 08:27:18 +0000 (+0800) Subject: GBE: avoid use a temporay register at the CMP instruction. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d1290292be2c55fef20d85eb8d72bee0fc818360;p=contrib%2Fbeignet.git GBE: avoid use a temporay register at the CMP instruction. Use one SEL instruction, we can easily transfer a flag to a normal bool vector register with correct mask. Signed-off-by: Zhigang Gong Reviewed-by: "Yang, Rong R" Reviewed-by: "Song, Ruiling" --- diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 93840ca..c9e0835 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -1077,7 +1077,6 @@ namespace gbe GenRegister tmp0 = ra->genReg(insn.dst(0)); GenRegister tmp1 = ra->genReg(insn.dst(1)); GenRegister tmp2 = ra->genReg(insn.dst(2)); - GenRegister dst = ra->genReg(insn.dst(3)); tmp0.type = (src0.type == GEN_TYPE_L) ? GEN_TYPE_D : GEN_TYPE_UD; tmp1.type = (src1.type == GEN_TYPE_L) ? GEN_TYPE_D : GEN_TYPE_UD; int flag = p->curr.flag, subFlag = p->curr.subFlag; @@ -1150,14 +1149,6 @@ namespace gbe p->curr.execWidth = 1; p->MOV(GenRegister::flag(flag, subFlag), f1); p->pop(); - p->push(); - p->curr.predicate = GEN_PREDICATE_NONE; - p->curr.noMask = 1; - p->MOV(dst, GenRegister::immd(0)); - p->curr.noMask = 0; - p->curr.predicate = GEN_PREDICATE_NORMAL; - p->MOV(dst, GenRegister::immd(-1)); - p->pop(); } void GenContext::emitI64SATADDInstruction(const SelectionInstruction &insn) { diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index a7baf72..a26aed7 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -486,7 +486,7 @@ namespace gbe /*! Shift a 64-bit integer */ void I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[7]); /*! Compare 64-bit integer */ - void I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3], Reg dst = GenRegister::null()); + void I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3]); /*! Saturated addition of 64-bit integer */ void I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]); /*! Saturated subtraction of 64-bit integer */ @@ -1244,13 +1244,12 @@ namespace gbe insn->src(2) = src2; } - void Selection::Opaque::I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3], Reg dst) { - SelectionInstruction *insn = this->appendInsn(SEL_OP_I64CMP, 4, 2); + void Selection::Opaque::I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3]) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_I64CMP, 3, 2); insn->src(0) = src0; insn->src(1) = src1; for(int i=0; i<3; i++) insn->dst(i) = tmp[i]; - insn->dst(3) = dst; insn->extra.function = conditional; } @@ -2657,11 +2656,12 @@ namespace gbe const Type type = insn.getType(); const Register dst = insn.getDst(0); GenRegister tmpDst; - - if (type == TYPE_BOOL || type == TYPE_U16 || type == TYPE_S16) - tmpDst = sel.selReg(dst, TYPE_BOOL); + if(type == TYPE_S64 || type == TYPE_U64 || + type == TYPE_DOUBLE || type == TYPE_FLOAT || + type == TYPE_U32 || type == TYPE_S32 ) + tmpDst = GenRegister::nullud(); else - tmpDst = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_S32); + tmpDst = sel.selReg(dst, TYPE_BOOL); // Look for immediate values for the right source GenRegister src0, src1; @@ -2691,7 +2691,7 @@ namespace gbe GenRegister tmp[3]; for(int i=0; i<3; i++) tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); - sel.I64CMP(getGenCompare(opcode), src0, src1, tmp, tmpDst); + sel.I64CMP(getGenCompare(opcode), src0, src1, tmp); } else if(opcode == OP_ORD) { sel.push(); sel.CMP(GEN_CONDITIONAL_EQ, src0, src0, tmpDst); @@ -2702,8 +2702,18 @@ namespace gbe sel.CMP(getGenCompare(opcode), src0, src1, tmpDst); sel.pop(); - if (!(type == TYPE_BOOL || type == TYPE_U16 || type == TYPE_S16)) - sel.MOV(sel.selReg(dst, TYPE_U16), GenRegister::unpacked_uw((ir::Register)tmpDst.value.reg)); + if(type == TYPE_S64 || type == TYPE_U64 || + type == TYPE_DOUBLE || type == TYPE_FLOAT || + type == TYPE_U32 || type == TYPE_S32 ) { + sel.push(); + sel.curr.flag = 1; + sel.curr.subFlag = 1; + sel.curr.predicate = GEN_PREDICATE_NORMAL; + sel.SEL(sel.selReg(dst, TYPE_U16), + sel.selReg(ir::ocl::one, TYPE_U16), + sel.selReg(ir::ocl::zero, TYPE_U16)); + sel.pop(); + } return true; } };