From 285c848244815511563c60af6a78cea717206706 Mon Sep 17 00:00:00 2001 From: Homer Hsing Date: Tue, 2 Jul 2013 14:45:10 +0800 Subject: [PATCH] support built-in functions "mul_hi", "mad_hi" Signed-off-by: Homer Hsing Reviewed-by: Song, Ruiling --- backend/src/backend/gen_context.cpp | 28 ++++++++++++++++ backend/src/backend/gen_insn_selection.cpp | 6 ++++ backend/src/backend/gen_insn_selection.hxx | 1 + backend/src/ir/instruction.cpp | 1 + backend/src/ir/instruction.hpp | 2 ++ backend/src/ir/instruction.hxx | 1 + backend/src/llvm/llvm_gen_backend.cpp | 18 +++++++++++ backend/src/llvm/llvm_gen_ocl_function.hxx | 2 ++ backend/src/ocl_stdlib.h | 52 ++++++++++++++++++++++++++++++ 9 files changed, 111 insertions(+) diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 62c6378..acd9c1d 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -179,6 +179,34 @@ namespace gbe const GenRegister src1 = ra->genReg(insn.src(1)); const GenRegister src2 = ra->genReg(insn.src(2)); switch (insn.opcode) { + case SEL_OP_MUL_HI: + { + int w = p->curr.execWidth; + p->push(); + p->curr.execWidth = 8; + p->curr.quarterControl = 0; + p->push(); + p->curr.predicate = GEN_PREDICATE_NONE; + p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), src0, src1); + p->curr.accWrEnable = 1; + p->MACH(src2, src0, src1); + p->curr.accWrEnable = 0; + p->pop(); + p->MOV(dst, src2); + if (w == 16) { + p->push(); + p->curr.predicate = GEN_PREDICATE_NONE; + p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1)); + p->curr.accWrEnable = 1; + p->MACH(src2, GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1)); + p->curr.accWrEnable = 0; + p->pop(); + p->curr.quarterControl = 1; + p->MOV(GenRegister::Qn(dst, 1), src2); + } + p->pop(); + break; + } case SEL_OP_MAD: p->MAD(dst, src0, src1, src2); break; case SEL_OP_HADD: { diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index f356b27..bbe392d 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -433,6 +433,7 @@ namespace gbe ALU2(MACH) ALU1(LZD) ALU3(MAD) + ALU3(MUL_HI) ALU1(FBH) ALU1(FBL) ALU3(HADD) @@ -1425,6 +1426,11 @@ namespace gbe case OP_SHL: sel.SHL(dst, src0, src1); break; case OP_SHR: sel.SHR(dst, src0, src1); break; case OP_ASR: sel.ASR(dst, src0, src1); break; + case OP_MUL_HI: { + GenRegister temp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_UD); + sel.MUL_HI(dst, src0, src1, temp); + break; + } case OP_MUL: if (type == TYPE_U32 || type == TYPE_S32) { sel.pop(); diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index 8a81022..c85d328 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -42,6 +42,7 @@ DECL_SELECTION_IR(BYTE_SCATTER, ByteScatterInstruction) DECL_SELECTION_IR(SAMPLE, SampleInstruction) DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction) DECL_SELECTION_IR(GET_IMAGE_INFO, GetImageInfoInstruction) +DECL_SELECTION_IR(MUL_HI, TernaryInstruction) DECL_SELECTION_IR(FBH, UnaryInstruction) DECL_SELECTION_IR(FBL, UnaryInstruction) DECL_SELECTION_IR(HADD, TernaryInstruction) diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index bd854a4..21b82ce 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -1329,6 +1329,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType()) DECL_EMIT_FUNCTION(ADDSAT) DECL_EMIT_FUNCTION(SUB) DECL_EMIT_FUNCTION(SUBSAT) + DECL_EMIT_FUNCTION(MUL_HI) DECL_EMIT_FUNCTION(DIV) DECL_EMIT_FUNCTION(REM) DECL_EMIT_FUNCTION(SHL) diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 3389ee0..fc1c984 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -517,6 +517,8 @@ namespace ir { Instruction COS(Type type, Register dst, Register src); /*! sin.type dst src */ Instruction SIN(Type type, Register dst, Register src); + /*! mul_hi.type dst src */ + Instruction MUL_HI(Type type, Register dst, Register src0, Register src1); /*! fbh.type dst src */ Instruction FBH(Type type, Register dst, Register src); /*! fbl.type dst src */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 8df393b..0e1c575 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -72,6 +72,7 @@ DECL_INSN(SAMPLE, SampleInstruction) DECL_INSN(SYNC, SyncInstruction) DECL_INSN(LABEL, LabelInstruction) DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction) +DECL_INSN(MUL_HI, BinaryInstruction) DECL_INSN(FBH, UnaryInstruction) DECL_INSN(FBL, UnaryInstruction) DECL_INSN(HADD, BinaryInstruction) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 88d2dd8..8385e21 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -1775,6 +1775,8 @@ namespace gbe this->newRegister(&I); break; } + case GEN_OCL_MUL_HI_INT: + case GEN_OCL_MUL_HI_UINT: case GEN_OCL_SADD_SAT_CHAR: case GEN_OCL_SADD_SAT_SHORT: case GEN_OCL_SADD_SAT_INT: @@ -2140,6 +2142,22 @@ namespace gbe ctx.TYPED_WRITE(srcTuple, srcType, coordType); break; } + case GEN_OCL_MUL_HI_INT: + { + GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; + GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; + const ir::Register dst = this->getRegister(&I); + ctx.MUL_HI(getType(ctx, I.getType()), dst, src0, src1); + break; + } + case GEN_OCL_MUL_HI_UINT: + { + GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; + GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; + const ir::Register dst = this->getRegister(&I); + ctx.MUL_HI(getUnsignedType(ctx, I.getType()), dst, src0, src1); + break; + } case GEN_OCL_SADD_SAT_CHAR: case GEN_OCL_SADD_SAT_SHORT: case GEN_OCL_SADD_SAT_INT: diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 89b57fc..f448a50 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -126,6 +126,8 @@ DECL_LLVM_GEN_FUNCTION(USUB_SAT_INT, _Z12ocl_usub_satjj) DECL_LLVM_GEN_FUNCTION(USUB_SAT_LONG, _Z12ocl_usub_satmm) // integer built-in functions +DECL_LLVM_GEN_FUNCTION(MUL_HI_INT, _Z16__gen_ocl_mul_hiii) +DECL_LLVM_GEN_FUNCTION(MUL_HI_UINT, _Z16__gen_ocl_mul_hijj) DECL_LLVM_GEN_FUNCTION(FBH, __gen_ocl_fbh) DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl) DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_abs) diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h index d4cf01f..04984d8 100644 --- a/backend/src/ocl_stdlib.h +++ b/backend/src/ocl_stdlib.h @@ -4356,6 +4356,58 @@ DEC(16) #undef DEC8 #undef DEC16 +OVERLOADABLE int __gen_ocl_mul_hi(int x, int y); +OVERLOADABLE uint __gen_ocl_mul_hi(uint x, uint y); +INLINE_OVERLOADABLE char mul_hi(char x, char y) { return (x * y) >> 8; } +INLINE_OVERLOADABLE uchar mul_hi(uchar x, uchar y) { return (x * y) >> 8; } +INLINE_OVERLOADABLE short mul_hi(short x, short y) { return (x * y) >> 16; } +INLINE_OVERLOADABLE ushort mul_hi(ushort x, ushort y) { return (x * y) >> 16; } +INLINE_OVERLOADABLE int mul_hi(int x, int y) { return __gen_ocl_mul_hi(x, y); } +INLINE_OVERLOADABLE uint mul_hi(uint x, uint y) { return __gen_ocl_mul_hi(x, y); } +#define DEC2(type) INLINE_OVERLOADABLE type##2 mul_hi(type##2 a, type##2 b) { return (mul_hi(a.s0, b.s0), mul_hi(a.s1, b.s1)); } +#define DEC3(type) INLINE_OVERLOADABLE type##3 mul_hi(type##3 a, type##3 b) { return (mul_hi(a.s0, b.s0), mul_hi(a.s1, b.s1), mul_hi(a.s2, b.s2)); } +#define DEC4(type) INLINE_OVERLOADABLE type##4 mul_hi(type##4 a, type##4 b) { return (mul_hi(a.s0, b.s0), mul_hi(a.s1, b.s1), mul_hi(a.s2, b.s2), mul_hi(a.s3, b.s3)); } +#define DEC8(type) INLINE_OVERLOADABLE type##8 mul_hi(type##8 a, type##8 b) { return (mul_hi(a.s0, b.s0), mul_hi(a.s1, b.s1), mul_hi(a.s2, b.s2), mul_hi(a.s3, b.s3), mul_hi(a.s4, b.s4), mul_hi(a.s5, b.s5), mul_hi(a.s6, b.s6), mul_hi(a.s7, b.s7)); } +#define DEC16(type) INLINE_OVERLOADABLE type##16 mul_hi(type##16 a, type##16 b) { return (mul_hi(a.s0, b.s0), mul_hi(a.s1, b.s1), mul_hi(a.s2, b.s2), mul_hi(a.s3, b.s3), mul_hi(a.s4, b.s4), mul_hi(a.s5, b.s5), mul_hi(a.s6, b.s6), mul_hi(a.s7, b.s7), mul_hi(a.s8, b.s8), mul_hi(a.s9, b.s9), mul_hi(a.sa, b.sa), mul_hi(a.sb, b.sb), mul_hi(a.sc, b.sc), mul_hi(a.sd, b.sd), mul_hi(a.se, b.se), mul_hi(a.sf, b.sf)); } +#define DEF(n) DEC##n(char); DEC##n(uchar); DEC##n(short); DEC##n(ushort); DEC##n(int); DEC##n(uint) +DEF(2) +DEF(3) +DEF(4) +DEF(8) +DEF(16) +#undef DEF +#undef DEC2 +#undef DEC3 +#undef DEC4 +#undef DEC8 +#undef DEC16 + +#define DEF(type) INLINE_OVERLOADABLE type mad_hi(type a, type b, type c) { return mul_hi(a, b) + c; } +DEF(char) +DEF(uchar) +DEF(short) +DEF(ushort) +DEF(int) +DEF(uint) +#undef DEF +#define DEC2(type) INLINE_OVERLOADABLE type##2 mad_hi(type##2 a, type##2 b, type##2 c) { return (mad_hi(a.s0, b.s0, c.s0), mad_hi(a.s1, b.s1, c.s1)); } +#define DEC3(type) INLINE_OVERLOADABLE type##3 mad_hi(type##3 a, type##3 b, type##3 c) { return (mad_hi(a.s0, b.s0, c.s0), mad_hi(a.s1, b.s1, c.s1), mad_hi(a.s2, b.s2, c.s2)); } +#define DEC4(type) INLINE_OVERLOADABLE type##4 mad_hi(type##4 a, type##4 b, type##4 c) { return (mad_hi(a.s0, b.s0, c.s0), mad_hi(a.s1, b.s1, c.s1), mad_hi(a.s2, b.s2, c.s2), mad_hi(a.s3, b.s3, c.s3)); } +#define DEC8(type) INLINE_OVERLOADABLE type##8 mad_hi(type##8 a, type##8 b, type##8 c) { return (mad_hi(a.s0, b.s0, c.s0), mad_hi(a.s1, b.s1, c.s1), mad_hi(a.s2, b.s2, c.s2), mad_hi(a.s3, b.s3, c.s3), mad_hi(a.s4, b.s4, c.s4), mad_hi(a.s5, b.s5, c.s5), mad_hi(a.s6, b.s6, c.s6), mad_hi(a.s7, b.s7, c.s7)); } +#define DEC16(type) INLINE_OVERLOADABLE type##16 mad_hi(type##16 a, type##16 b, type##16 c) { return (mad_hi(a.s0, b.s0, c.s0), mad_hi(a.s1, b.s1, c.s1), mad_hi(a.s2, b.s2, c.s2), mad_hi(a.s3, b.s3, c.s3), mad_hi(a.s4, b.s4, c.s4), mad_hi(a.s5, b.s5, c.s5), mad_hi(a.s6, b.s6, c.s6), mad_hi(a.s7, b.s7, c.s7), mad_hi(a.s8, b.s8, c.s8), mad_hi(a.s9, b.s9, c.s9), mad_hi(a.sa, b.sa, c.sa), mad_hi(a.sb, b.sb, c.sb), mad_hi(a.sc, b.sc, c.sc), mad_hi(a.sd, b.sd, c.sd), mad_hi(a.se, b.se, c.se), mad_hi(a.sf, b.sf, c.sf)); } +#define DEF(n) DEC##n(char); DEC##n(uchar); DEC##n(short); DEC##n(ushort); DEC##n(int); DEC##n(uint) +DEF(2) +DEF(3) +DEF(4) +DEF(8) +DEF(16) +#undef DEF +#undef DEC2 +#undef DEC3 +#undef DEC4 +#undef DEC8 +#undef DEC16 + INLINE_OVERLOADABLE uchar __rotate_left(uchar x, uchar y) { return (x << y) | (x >> (8 - y)); } INLINE_OVERLOADABLE char __rotate_left(char x, char y) { return __rotate_left((uchar)x, (uchar)y); } INLINE_OVERLOADABLE ushort __rotate_left(ushort x, ushort y) { return (x << y) | (x >> (16 - y)); } -- 2.7.4