From: Homer Hsing Date: Sun, 22 Sep 2013 06:18:01 +0000 (+0800) Subject: add 64-bit version of "mad_sat" X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2f5fc02430fa39f6990b0f33b42ae9de8b43b16b;p=contrib%2Fbeignet.git add 64-bit version of "mad_sat" tested by piglit: piglit/framework/../bin/cl-program-tester generated_tests/cl/builtin/int/builtin-long-mad_sat-1.0.generated.cl piglit/framework/../bin/cl-program-tester generated_tests/cl/builtin/int/builtin-ulong-mad_sat-1.0.generated.cl version 2: temp flag register is allocated by RA version 3: divide subnr of flag register by typesize Signed-off-by: Homer Hsing Reviewed-by: "Song, Ruiling" --- diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index ff08a8b..3167a9b 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -523,6 +523,133 @@ namespace gbe storeBottomHalf(dest, f); } + void GenContext::emitI64MADSATInstruction(const SelectionInstruction &insn) { + GenRegister dest = ra->genReg(insn.dst(0)); + GenRegister x = ra->genReg(insn.src(0)); + GenRegister y = ra->genReg(insn.src(1)); + GenRegister z = ra->genReg(insn.src(2)); + GenRegister a = ra->genReg(insn.dst(1)); + GenRegister b = ra->genReg(insn.dst(2)); + GenRegister c = ra->genReg(insn.dst(3)); + GenRegister d = ra->genReg(insn.dst(4)); + GenRegister e = ra->genReg(insn.dst(5)); + GenRegister f = ra->genReg(insn.dst(6)); + GenRegister g = ra->genReg(insn.dst(7)); + GenRegister h = ra->genReg(insn.dst(8)); + GenRegister i = ra->genReg(insn.dst(9)); + GenRegister flagReg = ra->genReg(insn.dst(10)); + GenRegister zero = GenRegister::immud(0), one = GenRegister::immud(1); + loadTopHalf(a, x); + loadBottomHalf(b, x); + loadTopHalf(c, y); + loadBottomHalf(d, y); + if(x.type == GEN_TYPE_UL) { + I64FullMult(e, f, g, h, a, b, c, d); + loadTopHalf(c, z); + loadBottomHalf(d, z); + addWithCarry(h, h, d); + addWithCarry(g, g, d); + addWithCarry(f, f, d); + p->ADD(e, e, d); + addWithCarry(g, g, c); + addWithCarry(f, f, c); + p->ADD(e, e, c); + p->OR(a, e, f); + p->push(); + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); + p->CMP(GEN_CONDITIONAL_NZ, a, zero); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->MOV(g, GenRegister::immd(-1)); + p->MOV(h, GenRegister::immd(-1)); + p->pop(); + } else { + I64ABS(e, a, b, i, flagReg); + I64ABS(f, c, d, i, flagReg); + p->XOR(i, e, f); + I64FullMult(e, f, g, h, a, b, c, d); + p->push(); + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); + p->CMP(GEN_CONDITIONAL_NZ, i, zero); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->NOT(e, e); + p->NOT(f, f); + p->NOT(g, g); + p->NOT(h, h); + p->MOV(i, one); + addWithCarry(h, h, i); + addWithCarry(g, g, i); + addWithCarry(f, f, i); + p->ADD(e, e, i); + p->pop(); + loadTopHalf(c, z); + loadBottomHalf(d, z); + p->ASR(GenRegister::retype(b, GEN_TYPE_D), GenRegister::retype(c, GEN_TYPE_D), GenRegister::immd(31)); + p->MOV(a, b); + addWithCarry(h, h, d); + addWithCarry(g, g, d); + addWithCarry(f, f, d); + p->ADD(e, e, d); + addWithCarry(g, g, c); + addWithCarry(f, f, c); + p->ADD(e, e, c); + addWithCarry(f, f, b); + p->ADD(e, e, b); + p->ADD(e, e, a); + p->MOV(b, zero); + p->push(); + p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr()); + p->curr.predicate = GEN_PREDICATE_NONE; + p->CMP(GEN_CONDITIONAL_NZ, e, zero); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->MOV(b, one); + p->curr.predicate = GEN_PREDICATE_NONE; + p->CMP(GEN_CONDITIONAL_NZ, f, zero); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->MOV(b, one); + p->curr.predicate = GEN_PREDICATE_NONE; + p->CMP(GEN_CONDITIONAL_G, g, GenRegister::immud(0x7FFFFFFF)); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->MOV(b, one); + p->curr.predicate = GEN_PREDICATE_NONE; + p->SHR(a, e, GenRegister::immud(31)); + p->CMP(GEN_CONDITIONAL_NZ, a, zero); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->MOV(b, zero); + p->curr.predicate = GEN_PREDICATE_NONE; + p->CMP(GEN_CONDITIONAL_NZ, b, zero); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->MOV(g, GenRegister::immud(0x7FFFFFFF)); + p->MOV(h, GenRegister::immud(0xFFFFFFFFu)); + p->curr.predicate = GEN_PREDICATE_NONE; + p->MOV(b, zero); + p->CMP(GEN_CONDITIONAL_NEQ, e, GenRegister::immud(0xFFFFFFFFu)); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->MOV(b, one); + p->curr.predicate = GEN_PREDICATE_NONE; + p->CMP(GEN_CONDITIONAL_NEQ, f, GenRegister::immud(0xFFFFFFFFu)); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->MOV(b, one); + p->curr.predicate = GEN_PREDICATE_NONE; + p->CMP(GEN_CONDITIONAL_LE, g, GenRegister::immud(0x7FFFFFFF)); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->MOV(b, one); + p->curr.predicate = GEN_PREDICATE_NONE; + p->CMP(GEN_CONDITIONAL_Z, a, zero); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->MOV(b, zero); + p->curr.predicate = GEN_PREDICATE_NONE; + p->CMP(GEN_CONDITIONAL_NZ, b, zero); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->MOV(g, GenRegister::immud(0x80000000u)); + p->MOV(h, zero); + p->pop(); + } + storeTopHalf(dest, g); + storeBottomHalf(dest, h); + } + void GenContext::emitI64HADDInstruction(const SelectionInstruction &insn) { GenRegister dest = ra->genReg(insn.dst(0)); GenRegister x = ra->genReg(insn.src(0)); diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index 6fe71c5..c9b74eb 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -101,6 +101,7 @@ namespace gbe void emitBinaryWithTempInstruction(const SelectionInstruction &insn); void emitTernaryInstruction(const SelectionInstruction &insn); void emitI64MULHIInstruction(const SelectionInstruction &insn); + void emitI64MADSATInstruction(const SelectionInstruction &insn); void emitI64HADDInstruction(const SelectionInstruction &insn); void emitI64RHADDInstruction(const SelectionInstruction &insn); void emitI64ShiftInstruction(const SelectionInstruction &insn); diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx index a420cfc..46537c6 100644 --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx @@ -10,6 +10,7 @@ DECL_GEN7_SCHEDULE(I64HADD, 20, 4, 2) DECL_GEN7_SCHEDULE(I64RHADD, 20, 4, 2) DECL_GEN7_SCHEDULE(I64ToFloat, 20, 4, 2) DECL_GEN7_SCHEDULE(I64MULHI, 20, 4, 2) +DECL_GEN7_SCHEDULE(I64MADSAT, 20, 4, 2) DECL_GEN7_SCHEDULE(Compare, 20, 4, 2) DECL_GEN7_SCHEDULE(I64Compare, 20, 4, 2) DECL_GEN7_SCHEDULE(Jump, 14, 1, 1) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 2791a0e..0691a58 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -471,6 +471,8 @@ namespace gbe #undef I64Shift /*! Convert 64-bit integer to 32-bit float */ void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[4]); + /*! Saturated 64bit x*y + z */ + void I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[10]); /*! High 64bit of x*y */ void I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister tmp[10]); /*! (x+y)>>1 without mod. overflow */ @@ -1089,6 +1091,16 @@ namespace gbe insn->dst(i + 1) = tmp[i]; } + void Selection::Opaque::I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[10]) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MADSAT, 11, 3); + insn->dst(0) = dst; + insn->src(0) = src0; + insn->src(1) = src1; + insn->src(2) = src2; + for(int i = 0; i < 10; i ++) + insn->dst(i + 1) = tmp[i]; + } + void Selection::Opaque::I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister tmp[10]) { SelectionInstruction *insn = this->appendInsn(SEL_OP_I64_MUL_HI, 11, 2); insn->dst(0) = dst; @@ -2586,6 +2598,36 @@ namespace gbe } }; + DECL_PATTERN(TernaryInstruction) + { + INLINE bool emitOne(Selection::Opaque &sel, const ir::TernaryInstruction &insn) const { + using namespace ir; + const Type type = insn.getType(); + const GenRegister dst = sel.selReg(insn.getDst(0), type), + src0 = sel.selReg(insn.getSrc(0), type), + src1 = sel.selReg(insn.getSrc(1), type), + src2 = sel.selReg(insn.getSrc(2), type); + switch(insn.getOpcode()) { + case OP_I64MADSAT: + { + GenRegister tmp[10]; + for(int i=0; i<9; i++) { + tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); + tmp[i].type = GEN_TYPE_UD; + } + tmp[9] = sel.selReg(sel.reg(FAMILY_BOOL)); + sel.I64MADSAT(dst, src0, src1, src2, tmp); + break; + } + default: + NOT_IMPLEMENTED; + } + return true; + } + + DECL_CTOR(TernaryInstruction, 1, 1); + }; + /*! Label instruction pattern */ DECL_PATTERN(LabelInstruction) { @@ -2876,6 +2918,7 @@ namespace gbe this->insert(); this->insert(); this->insert(); + this->insert(); this->insert(); this->insert(); this->insert(); diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index 86d1756..63ad810 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -71,3 +71,4 @@ DECL_SELECTION_IR(UPSAMPLE_LONG, BinaryInstruction) DECL_SELECTION_IR(CONVI_TO_I64, UnaryWithTempInstruction) DECL_SELECTION_IR(CONVI64_TO_I, UnaryInstruction) DECL_SELECTION_IR(CONVI64_TO_F, I64ToFloatInstruction) +DECL_SELECTION_IR(I64MADSAT, I64MADSATInstruction) diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp index c286f1d..a7337e6 100644 --- a/backend/src/ir/context.hpp +++ b/backend/src/ir/context.hpp @@ -142,6 +142,7 @@ namespace ir { this->NAME(type, dst, index); \ } DECL_THREE_SRC_INSN(SEL); + DECL_THREE_SRC_INSN(I64MADSAT); #undef DECL_THREE_SRC_INSN /*! For all unary functions */ diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 8f0ac1e..b3b9e10 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -173,6 +173,30 @@ namespace ir { } }; + class ALIGNED_INSTRUCTION TernaryInstruction : + public BasePolicy, + public NDstPolicy, + public TupleSrcPolicy + { + public: + TernaryInstruction(Opcode opcode, + Type type, + Register dst, + Tuple src) { + this->opcode = opcode; + this->type = type; + this->dst[0] = dst; + this->src = src; + } + Type getType(void) const { return type; } + bool wellFormed(const Function &fn, std::string &whyNot) const; + INLINE void out(std::ostream &out, const Function &fn) const; + Type type; + Register dst[1]; + Tuple src; + static const uint32_t srcNum = 3; + }; + /*! Three sources mean we need a tuple to encode it */ class ALIGNED_INSTRUCTION SelectInstruction : public BasePolicy, @@ -788,6 +812,25 @@ namespace ir { return true; } + INLINE bool TernaryInstruction::wellFormed(const Function &fn, std::string &whyNot) const + { + const RegisterFamily family = getFamily(this->type); + if (UNLIKELY(checkSpecialRegForWrite(dst[0], fn, whyNot) == false)) + return false; + if (UNLIKELY(checkRegisterData(family, dst[0], fn, whyNot) == false)) + return false; + if (UNLIKELY(src + 3u > fn.tupleNum())) { + whyNot = "Out-of-bound index for ternary instruction"; + return false; + } + for (uint32_t srcID = 0; srcID < 3; ++srcID) { + const Register regID = fn.getRegister(src, srcID); + if (UNLIKELY(checkRegisterData(family, regID, fn, whyNot) == false)) + return false; + } + return true; + } + /*! Loads and stores follow the same restrictions */ template INLINE bool wellFormedLoadStore(const T &insn, const Function &fn, std::string &whyNot) @@ -934,6 +977,10 @@ namespace ir { ternaryOrSelectOut(*this, out, fn); } + INLINE void TernaryInstruction::out(std::ostream &out, const Function &fn) const { + ternaryOrSelectOut(*this, out, fn); + } + INLINE void AtomicInstruction::out(std::ostream &out, const Function &fn) const { this->outOpcode(out); out << "." << addrSpace; @@ -1077,6 +1124,10 @@ START_INTROSPECTION(SelectInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(SelectInstruction) +START_INTROSPECTION(TernaryInstruction) +#include "ir/instruction.hxx" +END_INTROSPECTION(TernaryInstruction) + START_INTROSPECTION(BranchInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(BranchInstruction) @@ -1259,6 +1310,7 @@ DECL_MEM_FN(UnaryInstruction, Type, getType(void), getType()) DECL_MEM_FN(BinaryInstruction, Type, getType(void), getType()) DECL_MEM_FN(BinaryInstruction, bool, commutes(void), commutes()) DECL_MEM_FN(SelectInstruction, Type, getType(void), getType()) +DECL_MEM_FN(TernaryInstruction, Type, getType(void), getType()) DECL_MEM_FN(CompareInstruction, Type, getType(void), getType()) DECL_MEM_FN(ConvertInstruction, Type, getSrcType(void), getSrcType()) DECL_MEM_FN(ConvertInstruction, Type, getDstType(void), getDstType()) @@ -1359,6 +1411,10 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType()) return internal::SelectInstruction(type, dst, src).convert(); } + Instruction I64MADSAT(Type type, Register dst, Tuple src) { + return internal::TernaryInstruction(OP_I64MADSAT, type, dst, src).convert(); + } + // All compare functions #define DECL_EMIT_FUNCTION(NAME) \ Instruction NAME(Type type, Register dst, Register src0, Register src1) { \ diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index f165595..0f7df58 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -206,6 +206,13 @@ namespace ir { static bool isClassOf(const Instruction &insn); }; + /*! Ternary instructions are typed. dst and sources share the same type */ + class TernaryInstruction : public Instruction { + public: + Type getType(void) const; + static bool isClassOf(const Instruction &insn); + }; + /*! Select instructions writes src0 to dst if cond is true. Otherwise, it * writes src1 */ @@ -521,6 +528,8 @@ namespace ir { Instruction MUL_HI(Type type, Register dst, Register src0, Register src1); /*! i64_mul_hi.type dst src */ Instruction I64_MUL_HI(Type type, Register dst, Register src0, Register src1); + /*! i64madsat.type dst src */ + Instruction I64MADSAT(Type type, Register dst, Tuple src); /*! upsample_short.type dst src */ Instruction UPSAMPLE_SHORT(Type type, Register dst, Register src0, Register src1); /*! upsample_int.type dst src */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 135dc82..f3f2db6 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -83,3 +83,4 @@ DECL_INSN(I64RHADD, BinaryInstruction) DECL_INSN(UPSAMPLE_SHORT, BinaryInstruction) DECL_INSN(UPSAMPLE_INT, BinaryInstruction) DECL_INSN(UPSAMPLE_LONG, BinaryInstruction) +DECL_INSN(I64MADSAT, TernaryInstruction) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index ffc05bf..05b5874 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -1905,6 +1905,8 @@ namespace gbe case GEN_OCL_RHADD: case GEN_OCL_I64HADD: case GEN_OCL_I64RHADD: + case GEN_OCL_I64_MAD_SAT: + case GEN_OCL_I64_MAD_SATU: this->newRegister(&I); break; default: @@ -2343,6 +2345,24 @@ namespace gbe ctx.SUBSAT(getUnsignedType(ctx, I.getType()), dst, src0, src1); break; } + case GEN_OCL_I64_MAD_SAT: + { + GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; + GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; + GBE_ASSERT(AI != AE); const ir::Register src2 = this->getRegister(*AI); ++AI; + const ir::Register dst = this->getRegister(&I); + ctx.I64MADSAT(getType(ctx, I.getType()), dst, src0, src1, src2); + break; + } + case GEN_OCL_I64_MAD_SATU: + { + GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; + GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; + GBE_ASSERT(AI != AE); const ir::Register src2 = this->getRegister(*AI); ++AI; + const ir::Register dst = this->getRegister(&I); + ctx.I64MADSAT(getUnsignedType(ctx, I.getType()), dst, src0, src1, src2); + break; + } case GEN_OCL_HADD: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 7b5a2d3..5ea879c 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -125,6 +125,9 @@ DECL_LLVM_GEN_FUNCTION(USUB_SAT_SHORT, _Z12ocl_usub_sattt) DECL_LLVM_GEN_FUNCTION(USUB_SAT_INT, _Z12ocl_usub_satjj) DECL_LLVM_GEN_FUNCTION(USUB_SAT_LONG, _Z12ocl_usub_satmm) +DECL_LLVM_GEN_FUNCTION(I64_MAD_SAT, _Z17__gen_ocl_mad_satlll) +DECL_LLVM_GEN_FUNCTION(I64_MAD_SATU, _Z17__gen_ocl_mad_satmmm) + // integer built-in functions DECL_LLVM_GEN_FUNCTION(MUL_HI_INT, _Z16__gen_ocl_mul_hiii) DECL_LLVM_GEN_FUNCTION(MUL_HI_UINT, _Z16__gen_ocl_mul_hijj) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index ea9ae6e..fa1ba37 100644 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -423,12 +423,15 @@ INLINE_OVERLOADABLE uint mad_sat(uint a, uint b, uint c) { return (uint)x; } +OVERLOADABLE long __gen_ocl_mad_sat(long a, long b, long c); +OVERLOADABLE ulong __gen_ocl_mad_sat(ulong a, ulong b, ulong c); + INLINE_OVERLOADABLE long mad_sat(long a, long b, long c) { - return 0; + return __gen_ocl_mad_sat(a, b, c); } INLINE_OVERLOADABLE ulong mad_sat(ulong a, ulong b, ulong c) { - return 0; + return __gen_ocl_mad_sat(a, b, c); } INLINE_OVERLOADABLE uchar __rotate_left(uchar x, uchar y) { return (x << y) | (x >> (8 - y)); }