From f16faff90f30a5497220a8f81053245d66ae81be Mon Sep 17 00:00:00 2001 From: Homer Hsing Date: Sun, 22 Sep 2013 14:18:03 +0800 Subject: [PATCH] add 64-bit version of "sub_sat" passed PIGLIT test cases: bin/cl-program-tester generated_tests/cl/builtin/int/builtin-long-sub_sat-1.0.generated.cl bin/cl-program-tester generated_tests/cl/builtin/int/builtin-ulong-sub_sat-1.0.generated.cl version 2: temp flag register is allocated by RA now version 3: subnr of temp flag reg is divided by typesize Signed-off-by: Homer Hsing Reviewed-by: "Song, Ruiling" --- backend/src/backend/gen_context.cpp | 48 +++++++++++++++++++++- backend/src/backend/gen_context.hpp | 1 + .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 + backend/src/backend/gen_insn_selection.cpp | 21 ++++++++++ backend/src/backend/gen_insn_selection.hxx | 1 + backend/src/ocl_stdlib.tmpl.h | 9 +++- 6 files changed, 79 insertions(+), 2 deletions(-) diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 8bdefe1..2e34336 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -981,6 +981,52 @@ namespace gbe storeBottomHalf(dst, b); } + void GenContext::emitI64SATSUBInstruction(const SelectionInstruction &insn) { + GenRegister x = ra->genReg(insn.src(0)); + GenRegister y = ra->genReg(insn.src(1)); + GenRegister dst = ra->genReg(insn.dst(0)); + GenRegister a = ra->genReg(insn.dst(1)); + GenRegister b = ra->genReg(insn.dst(2)); + GenRegister c = ra->genReg(insn.dst(3)); + GenRegister d = ra->genReg(insn.dst(4)); + GenRegister e = ra->genReg(insn.dst(5)); + GenRegister flagReg = ra->genReg(insn.dst(6)); + loadTopHalf(a, x); + loadBottomHalf(b, x); + loadTopHalf(c, y); + loadBottomHalf(d, y); + if(dst.is_signed_int()) + p->SHR(e, a, GenRegister::immud(31)); + subWithBorrow(b, b, d); + subWithBorrow(a, a, d); + subWithBorrow(a, a, c); + p->ADD(c, c, d); + p->push(); + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); + if(! dst.is_signed_int()) { + p->CMP(GEN_CONDITIONAL_NZ, c, GenRegister::immud(0)); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->MOV(a, GenRegister::immud(0)); + p->MOV(b, GenRegister::immud(0)); + } else { + p->CMP(GEN_CONDITIONAL_EQ, e, GenRegister::immud(1)); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->CMP(GEN_CONDITIONAL_L, a, GenRegister::immud(0x80000000u)); + p->MOV(a, GenRegister::immud(0x80000000u)); + p->MOV(b, GenRegister::immud(0)); + p->curr.predicate = GEN_PREDICATE_NONE; + p->CMP(GEN_CONDITIONAL_EQ, e, GenRegister::immud(0)); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->CMP(GEN_CONDITIONAL_GE, a, GenRegister::immud(0x80000000u)); + p->MOV(a, GenRegister::immud(0x7FFFFFFFu)); + p->MOV(b, GenRegister::immud(0xFFFFFFFFu)); + } + p->pop(); + storeTopHalf(dst, a); + storeBottomHalf(dst, b); + } + void GenContext::loadTopHalf(GenRegister dest, GenRegister src) { int execWidth = p->curr.execWidth; src = src.top_half(); @@ -1068,11 +1114,11 @@ namespace gbe int execWidth = p->curr.execWidth; GenRegister acc0 = GenRegister::retype(GenRegister::acc(), GEN_TYPE_D); p->push(); - p->curr.predicate = GEN_PREDICATE_NONE; p->curr.execWidth = 8; p->SUBB(dest, src0, src1); p->MOV(src1, acc0); if (execWidth == 16) { + p->curr.quarterControl = 1; p->SUBB(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src0, 8), GenRegister::suboffset(src1, 8)); diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index fa43571..e24c126 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -107,6 +107,7 @@ namespace gbe void emitI64ShiftInstruction(const SelectionInstruction &insn); void emitI64CompareInstruction(const SelectionInstruction &insn); void emitI64SATADDInstruction(const SelectionInstruction &insn); + void emitI64SATSUBInstruction(const SelectionInstruction &insn); void emitI64ToFloatInstruction(const SelectionInstruction &insn); void emitCompareInstruction(const SelectionInstruction &insn); void emitJumpInstruction(const SelectionInstruction &insn); diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx index 1ff10de..b94f235 100644 --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx @@ -36,3 +36,4 @@ DECL_GEN7_SCHEDULE(GetImageInfo, 20, 4, 2) DECL_GEN7_SCHEDULE(Atomic, 80, 1, 1) DECL_GEN7_SCHEDULE(I64MUL, 20, 4, 2) DECL_GEN7_SCHEDULE(I64SATADD, 20, 4, 2) +DECL_GEN7_SCHEDULE(I64SATSUB, 20, 4, 2) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index bc3c884..83b4f1b 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -485,6 +485,8 @@ namespace gbe void I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3]); /*! Saturated addition of 64-bit integer */ void I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]); + /*! Saturated subtraction of 64-bit integer */ + void I64SATSUB(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]); /*! Encode a barrier instruction */ void BARRIER(GenRegister src); /*! Encode a barrier instruction */ @@ -1094,6 +1096,15 @@ namespace gbe insn->dst(i + 1) = tmp[i]; } + void Selection::Opaque::I64SATSUB(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_I64SATSUB, 7, 2); + insn->dst(0) = dst; + insn->src(0) = src0; + insn->src(1) = src1; + for(int i=0; i<6; i++) + insn->dst(i + 1) = tmp[i]; + } + void Selection::Opaque::CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[4]) { SelectionInstruction *insn = this->appendInsn(SEL_OP_CONVI64_TO_F, 5, 1); insn->dst(0) = dst; @@ -1684,6 +1695,16 @@ namespace gbe sel.ADD(dst, src0, GenRegister::negate(src1)); break; case OP_SUBSAT: + if (type == Type::TYPE_U64 || type == Type::TYPE_S64) { + GenRegister tmp[6]; + for(int i=0; i<5; i++) { + tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); + tmp[i].type = GEN_TYPE_UD; + } + tmp[5] = sel.selReg(sel.reg(FAMILY_BOOL)); + sel.I64SATSUB(dst, src0, src1, tmp); + break; + } sel.push(); sel.curr.saturate = GEN_MATH_SATURATE_SATURATE; sel.ADD(dst, src0, GenRegister::negate(src1)); diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index 9814cc0..89b878d 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -30,6 +30,7 @@ DECL_SELECTION_IR(ADD, BinaryInstruction) DECL_SELECTION_IR(I64ADD, BinaryWithTempInstruction) DECL_SELECTION_IR(I64SATADD, I64SATADDInstruction) DECL_SELECTION_IR(I64SUB, BinaryWithTempInstruction) +DECL_SELECTION_IR(I64SATSUB, I64SATSUBInstruction) DECL_SELECTION_IR(MUL, BinaryInstruction) DECL_SELECTION_IR(I64MUL, I64MULInstruction) DECL_SELECTION_IR(ATOMIC, AtomicInstruction) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 696259c..ff6f251 100644 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -199,7 +199,14 @@ INLINE_OVERLOADABLE long add_sat(long x, long y) { return ocl_sadd_sat(x, y); } OVERLOADABLE long ocl_ssub_sat(long x, long y); -INLINE_OVERLOADABLE long sub_sat(long x, long y) { return ocl_ssub_sat(x, y); } +INLINE_OVERLOADABLE long sub_sat(long x, long y) { + union {long l; uint i[2];} ux, uy; + ux.l = x; + uy.l = y; + if((ux.i[1] ^ uy.i[1]) & 0x80000000u) + return ocl_ssub_sat(x, y); + return x - y; +} #define UDEF(TYPE) \ OVERLOADABLE TYPE ocl_uadd_sat(TYPE x, TYPE y); \ OVERLOADABLE TYPE ocl_usub_sat(TYPE x, TYPE y); \ -- 2.7.4