From: Homer Hsing Date: Sun, 22 Sep 2013 06:18:02 +0000 (+0800) Subject: support 64-bit version "add_sat" X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=4df6ea60000ba00227d8f30705311c3d732549f4;p=contrib%2Fbeignet.git support 64-bit version "add_sat" tested by piglit: piglit/bin/cl-program-tester generated_tests/cl/builtin/int/builtin-long-add_sat-1.0.generated.cl piglit/bin/cl-program-tester generated_tests/cl/builtin/int/builtin-ulong-add_sat-1.0.generated.cl version 2: temp flag register is now allocated by RA version 3: divide subnr of temp flag reg by typesize Signed-off-by: Homer Hsing Reviewed-by: "Song, Ruiling" --- diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 3167a9b..8bdefe1 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -935,6 +935,52 @@ namespace gbe p->pop(); } + void GenContext::emitI64SATADDInstruction(const SelectionInstruction &insn) { + GenRegister x = ra->genReg(insn.src(0)); + GenRegister y = ra->genReg(insn.src(1)); + GenRegister dst = ra->genReg(insn.dst(0)); + GenRegister a = ra->genReg(insn.dst(1)); + GenRegister b = ra->genReg(insn.dst(2)); + GenRegister c = ra->genReg(insn.dst(3)); + GenRegister d = ra->genReg(insn.dst(4)); + GenRegister e = ra->genReg(insn.dst(5)); + GenRegister flagReg = ra->genReg(insn.dst(6)); + loadTopHalf(a, x); + loadBottomHalf(b, x); + loadTopHalf(c, y); + loadBottomHalf(d, y); + if(dst.is_signed_int()) + p->SHR(e, a, GenRegister::immud(31)); + addWithCarry(b, b, d); + addWithCarry(a, a, d); + addWithCarry(a, a, c); + p->ADD(c, c, d); + p->push(); + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); + if(! dst.is_signed_int()) { + p->CMP(GEN_CONDITIONAL_NZ, c, GenRegister::immud(0)); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->MOV(a, GenRegister::immud(0xFFFFFFFFu)); + p->MOV(b, GenRegister::immud(0xFFFFFFFFu)); + } else { + p->CMP(GEN_CONDITIONAL_EQ, e, GenRegister::immud(1)); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->CMP(GEN_CONDITIONAL_L, a, GenRegister::immud(0x80000000u)); + p->MOV(a, GenRegister::immud(0x80000000u)); + p->MOV(b, GenRegister::immud(0)); + p->curr.predicate = GEN_PREDICATE_NONE; + p->CMP(GEN_CONDITIONAL_EQ, e, GenRegister::immud(0)); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->CMP(GEN_CONDITIONAL_GE, a, GenRegister::immud(0x80000000u)); + p->MOV(a, GenRegister::immud(0x7FFFFFFFu)); + p->MOV(b, GenRegister::immud(0xFFFFFFFFu)); + } + p->pop(); + storeTopHalf(dst, a); + storeBottomHalf(dst, b); + } + void GenContext::loadTopHalf(GenRegister dest, GenRegister src) { int execWidth = p->curr.execWidth; src = src.top_half(); diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index c9b74eb..fa43571 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -106,6 +106,7 @@ namespace gbe void emitI64RHADDInstruction(const SelectionInstruction &insn); void emitI64ShiftInstruction(const SelectionInstruction &insn); void emitI64CompareInstruction(const SelectionInstruction &insn); + void emitI64SATADDInstruction(const SelectionInstruction &insn); void emitI64ToFloatInstruction(const SelectionInstruction &insn); void emitCompareInstruction(const SelectionInstruction &insn); void emitJumpInstruction(const SelectionInstruction &insn); diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx index 46537c6..1ff10de 100644 --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx @@ -35,3 +35,4 @@ DECL_GEN7_SCHEDULE(UnSpillReg, 80, 1, 1) DECL_GEN7_SCHEDULE(GetImageInfo, 20, 4, 2) DECL_GEN7_SCHEDULE(Atomic, 80, 1, 1) DECL_GEN7_SCHEDULE(I64MUL, 20, 4, 2) +DECL_GEN7_SCHEDULE(I64SATADD, 20, 4, 2) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 0691a58..bc3c884 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -483,6 +483,8 @@ namespace gbe void I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[7]); /*! Compare 64-bit integer */ void I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3]); + /*! Saturated addition of 64-bit integer */ + void I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]); /*! Encode a barrier instruction */ void BARRIER(GenRegister src); /*! Encode a barrier instruction */ @@ -1083,6 +1085,15 @@ namespace gbe insn->extra.function = conditional; } + void Selection::Opaque::I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_I64SATADD, 7, 2); + insn->dst(0) = dst; + insn->src(0) = src0; + insn->src(1) = src1; + for(int i=0; i<6; i++) + insn->dst(i + 1) = tmp[i]; + } + void Selection::Opaque::CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[4]) { SelectionInstruction *insn = this->appendInsn(SEL_OP_CONVI64_TO_F, 5, 1); insn->dst(0) = dst; @@ -1632,6 +1643,16 @@ namespace gbe sel.ADD(dst, src0, src1); break; case OP_ADDSAT: + if (type == Type::TYPE_U64 || type == Type::TYPE_S64) { + GenRegister tmp[6]; + for(int i=0; i<5; i++) { + tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); + tmp[i].type = GEN_TYPE_UD; + } + tmp[5] = sel.selReg(sel.reg(FAMILY_BOOL)); + sel.I64SATADD(dst, src0, src1, tmp); + break; + } sel.push(); sel.curr.saturate = GEN_MATH_SATURATE_SATURATE; sel.ADD(dst, src0, src1); diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index 63ad810..9814cc0 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -28,6 +28,7 @@ DECL_SELECTION_IR(I64SHL, I64ShiftInstruction) DECL_SELECTION_IR(I64ASR, I64ShiftInstruction) DECL_SELECTION_IR(ADD, BinaryInstruction) DECL_SELECTION_IR(I64ADD, BinaryWithTempInstruction) +DECL_SELECTION_IR(I64SATADD, I64SATADDInstruction) DECL_SELECTION_IR(I64SUB, BinaryWithTempInstruction) DECL_SELECTION_IR(MUL, BinaryInstruction) DECL_SELECTION_IR(I64MUL, I64MULInstruction) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index fa1ba37..696259c 100644 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -182,7 +182,6 @@ INLINE_OVERLOADABLE TYPE add_sat(TYPE x, TYPE y) { return ocl_sadd_sat(x, y); } INLINE_OVERLOADABLE TYPE sub_sat(TYPE x, TYPE y) { return ocl_ssub_sat(x, y); } SDEF(char); SDEF(short); -SDEF(long); #undef SDEF OVERLOADABLE int ocl_sadd_sat(int x, int y); INLINE_OVERLOADABLE int add_sat(int x, int y) { return ocl_sadd_sat(x, y); } @@ -190,6 +189,17 @@ OVERLOADABLE int ocl_ssub_sat(int x, int y); INLINE_OVERLOADABLE int sub_sat(int x, int y) { return (y == 0x80000000u) ? (x & 0x7FFFFFFF) : ocl_ssub_sat(x, y); } +OVERLOADABLE long ocl_sadd_sat(long x, long y); +INLINE_OVERLOADABLE long add_sat(long x, long y) { + union {long l; uint i[2];} ux, uy; + ux.l = x; + uy.l = y; + if((ux.i[1] ^ uy.i[1]) & 0x80000000u) + return x + y; + return ocl_sadd_sat(x, y); +} +OVERLOADABLE long ocl_ssub_sat(long x, long y); +INLINE_OVERLOADABLE long sub_sat(long x, long y) { return ocl_ssub_sat(x, y); } #define UDEF(TYPE) \ OVERLOADABLE TYPE ocl_uadd_sat(TYPE x, TYPE y); \ OVERLOADABLE TYPE ocl_usub_sat(TYPE x, TYPE y); \ @@ -201,7 +211,6 @@ UDEF(uint); UDEF(ulong); #undef UDEF - uchar INLINE_OVERLOADABLE convert_uchar_sat(float x) { return add_sat((uchar)x, (uchar)0); }