From: Homer Hsing Date: Wed, 26 Jun 2013 05:11:53 +0000 (+0800) Subject: support zero bit counting X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=68d21e7fb41649b554ce50d58878c211ab18513c;p=contrib%2Fbeignet.git support zero bit counting support OpenCL built-in function "clz", returning number of leading zero bits add GEN GPU "fbh", "fbl" instructions, for counting zero bits Signed-off-by: Homer Hsing Reviewed-by: Junyan He --- diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c index 17fc845..9a4e283 100644 --- a/backend/src/backend/gen/gen_mesa_disasm.c +++ b/backend/src/backend/gen/gen_mesa_disasm.c @@ -63,6 +63,8 @@ static const struct { [GEN_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, + [GEN_OPCODE_FBH] = { .name = "fbh", .nsrc = 1, .ndst = 1 }, + [GEN_OPCODE_FBL] = { .name = "fbl", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 }, [GEN_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 53ba73c..93d3932 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -139,6 +139,8 @@ namespace gbe const GenRegister src = ra->genReg(insn.src(0)); switch (insn.opcode) { case SEL_OP_MOV: p->MOV(dst, src); break; + case SEL_OP_FBH: p->FBH(dst, src); break; + case SEL_OP_FBL: p->FBL(dst, src); break; case SEL_OP_NOT: p->NOT(dst, src); break; case SEL_OP_RNDD: p->RNDD(dst, src); break; case SEL_OP_RNDU: p->RNDU(dst, src); break; diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp index 9d8db5b..d1ce6b2 100644 --- a/backend/src/backend/gen_defs.hpp +++ b/backend/src/backend/gen_defs.hpp @@ -154,6 +154,8 @@ enum opcode { GEN_OPCODE_MAC = 72, GEN_OPCODE_MACH = 73, GEN_OPCODE_LZD = 74, + GEN_OPCODE_FBH = 75, + GEN_OPCODE_FBL = 76, GEN_OPCODE_SAD2 = 80, GEN_OPCODE_SADA2 = 81, GEN_OPCODE_DP4 = 84, diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index ae981b2..e96678b 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -824,6 +824,8 @@ namespace gbe ALU1(RNDE) ALU1(RNDD) ALU1(RNDU) + ALU1(FBH) + ALU1(FBL) ALU2(SEL) ALU1(NOT) ALU2(AND) diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp index 1a5dcf9..88a3e77 100644 --- a/backend/src/backend/gen_encoder.hpp +++ b/backend/src/backend/gen_encoder.hpp @@ -90,6 +90,8 @@ namespace gbe #define ALU2(OP) void OP(GenRegister dest, GenRegister src0, GenRegister src1); #define ALU3(OP) void OP(GenRegister dest, GenRegister src0, GenRegister src1, GenRegister src2); ALU1(MOV) + ALU1(FBH) + ALU1(FBL) ALU1(RNDZ) ALU1(RNDE) ALU1(RNDD) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 1e5f514..8fb2a80 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -431,6 +431,8 @@ namespace gbe ALU2(MACH) ALU1(LZD) ALU3(MAD) + ALU1(FBH) + ALU1(FBL) #undef ALU1 #undef ALU2 #undef ALU3 @@ -1211,10 +1213,16 @@ namespace gbe /*! Unary instruction patterns */ DECL_PATTERN(UnaryInstruction) { + static ir::Type getType(const ir::Opcode opcode) { + if (opcode == ir::OP_FBH || opcode == ir::OP_FBL) + return ir::TYPE_U32; + return ir::TYPE_FLOAT; + } + INLINE bool emitOne(Selection::Opaque &sel, const ir::UnaryInstruction &insn) const { const ir::Opcode opcode = insn.getOpcode(); - const GenRegister dst = sel.selReg(insn.getDst(0)); - const GenRegister src = sel.selReg(insn.getSrc(0)); + const GenRegister dst = sel.selReg(insn.getDst(0), getType(opcode)); + const GenRegister src = sel.selReg(insn.getSrc(0), getType(opcode)); switch (opcode) { case ir::OP_ABS: sel.MOV(dst, GenRegister::abs(src)); break; case ir::OP_MOV: @@ -1228,6 +1236,8 @@ namespace gbe case ir::OP_RNDE: sel.RNDE(dst, src); break; case ir::OP_RNDU: sel.RNDU(dst, src); break; case ir::OP_RNDZ: sel.RNDZ(dst, src); break; + case ir::OP_FBH: sel.FBH(dst, src); break; + case ir::OP_FBL: sel.FBL(dst, src); break; case ir::OP_COS: sel.MATH(dst, GEN_MATH_FUNCTION_COS, src); break; case ir::OP_SIN: sel.MATH(dst, GEN_MATH_FUNCTION_SIN, src); break; case ir::OP_LOG: sel.MATH(dst, GEN_MATH_FUNCTION_LOG, src); break; diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index 4b5525b..cc2be08 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -41,3 +41,5 @@ DECL_SELECTION_IR(BYTE_SCATTER, ByteScatterInstruction) DECL_SELECTION_IR(SAMPLE, SampleInstruction) DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction) DECL_SELECTION_IR(GET_IMAGE_INFO, GetImageInfoInstruction) +DECL_SELECTION_IR(FBH, UnaryInstruction) +DECL_SELECTION_IR(FBL, UnaryInstruction) diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index a57c204..67a4c12 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -1239,6 +1239,8 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType()) } DECL_EMIT_FUNCTION(MOV) + DECL_EMIT_FUNCTION(FBH) + DECL_EMIT_FUNCTION(FBL) DECL_EMIT_FUNCTION(COS) DECL_EMIT_FUNCTION(SIN) DECL_EMIT_FUNCTION(LOG) diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 7662b6a..0f3bd34 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -485,6 +485,10 @@ namespace ir { Instruction COS(Type type, Register dst, Register src); /*! sin.type dst src */ Instruction SIN(Type type, Register dst, Register src); + /*! fbh.type dst src */ + Instruction FBH(Type type, Register dst, Register src); + /*! fbl.type dst src */ + Instruction FBL(Type type, Register dst, Register src); /*! tan.type dst src */ Instruction RCP(Type type, Register dst, Register src); /*! abs.type dst src */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 5cf37d2..acfb45a 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -71,3 +71,5 @@ DECL_INSN(SAMPLE, SampleInstruction) DECL_INSN(SYNC, SyncInstruction) DECL_INSN(LABEL, LabelInstruction) DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction) +DECL_INSN(FBH, UnaryInstruction) +DECL_INSN(FBL, UnaryInstruction) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 5b7754c..08500ba 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -1678,6 +1678,8 @@ namespace gbe regTranslator.newScalarProxy(ir::ocl::goffset2, dst); break; case GEN_OCL_GET_WORK_DIM: regTranslator.newScalarProxy(ir::ocl::workdim, dst); break; + case GEN_OCL_FBH: + case GEN_OCL_FBL: case GEN_OCL_COS: case GEN_OCL_SIN: case GEN_OCL_SQR: @@ -1842,6 +1844,8 @@ namespace gbe ctx.POW(ir::TYPE_FLOAT, dst, src0, src1); break; } + case GEN_OCL_FBH: this->emitUnaryCallInst(I,CS,ir::OP_FBH); break; + case GEN_OCL_FBL: this->emitUnaryCallInst(I,CS,ir::OP_FBL); break; case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break; case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break; case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break; diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 6cd7298..fe19844 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -96,3 +96,7 @@ DECL_LLVM_GEN_FUNCTION(USUB_SAT_CHAR, _Z12ocl_usub_sathh) DECL_LLVM_GEN_FUNCTION(USUB_SAT_SHORT, _Z12ocl_usub_sattt) DECL_LLVM_GEN_FUNCTION(USUB_SAT_INT, _Z12ocl_usub_satjj) DECL_LLVM_GEN_FUNCTION(USUB_SAT_LONG, _Z12ocl_usub_satmm) + +// integer built-in functions +DECL_LLVM_GEN_FUNCTION(FBH, __gen_ocl_fbh) +DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl) diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h index 81a0193..3b191ab 100644 --- a/backend/src/ocl_stdlib.h +++ b/backend/src/ocl_stdlib.h @@ -4290,7 +4290,71 @@ DEC(16); #undef DEC4 #undef DEC8 #undef DEC16 - +///////////////////////////////////////////////////////////////////////////// +// Integer built-in functions +///////////////////////////////////////////////////////////////////////////// +PURE CONST uint __gen_ocl_fbh(uint); +PURE CONST uint __gen_ocl_fbl(uint); + +INLINE_OVERLOADABLE char clz(char x) { + if (x < 0) + return 0; + if (x == 0) + return 8; + return __gen_ocl_fbl(x) - 24; +} + +INLINE_OVERLOADABLE uchar clz(uchar x) { + if (x == 0) + return 8; + return __gen_ocl_fbl(x) - 24; +} + +INLINE_OVERLOADABLE short clz(short x) { + if (x < 0) + return 0; + if (x == 0) + return 16; + return __gen_ocl_fbh(x) - 16; +} + +INLINE_OVERLOADABLE ushort clz(ushort x) { + if (x == 0) + return 16; + return __gen_ocl_fbh(x) - 16; +} + +INLINE_OVERLOADABLE int clz(int x) { + if (x < 0) + return 0; + if (x == 0) + return 32; + return __gen_ocl_fbh(x); +} + +INLINE_OVERLOADABLE uint clz(uint x) { + if (x == 0) + return 32; + return __gen_ocl_fbh(x); +} + +#define DEC2(type) INLINE_OVERLOADABLE type##2 clz(type##2 a) { return (clz(a.s0), clz(a.s1)); } +#define DEC3(type) INLINE_OVERLOADABLE type##3 clz(type##3 a) { return (clz(a.s0), clz(a.s1), clz(a.s2)); } +#define DEC4(type) INLINE_OVERLOADABLE type##4 clz(type##4 a) { return (clz(a.s0), clz(a.s1), clz(a.s2), clz(a.s3)); } +#define DEC8(type) INLINE_OVERLOADABLE type##8 clz(type##8 a) { return (clz(a.s0), clz(a.s1), clz(a.s2), clz(a.s3), clz(a.s4), clz(a.s5), clz(a.s6), clz(a.s7)); } +#define DEC16(type) INLINE_OVERLOADABLE type##16 clz(type##16 a) { return (clz(a.s0), clz(a.s1), clz(a.s2), clz(a.s3), clz(a.s4), clz(a.s5), clz(a.s6), clz(a.s7), clz(a.s8), clz(a.s9), clz(a.sa), clz(a.sb), clz(a.sc), clz(a.sd), clz(a.se), clz(a.sf)); } +#define DEC(n) DEC##n(char); DEC##n(uchar); DEC##n(short); DEC##n(ushort); DEC##n(int); DEC##n(uint) +DEC(2) +DEC(3) +DEC(4) +DEC(8) +DEC(16) +#undef DEC +#undef DEC2 +#undef DEC3 +#undef DEC4 +#undef DEC8 +#undef DEC16 ///////////////////////////////////////////////////////////////////////////// // Work Items functions (see 6.11.1 of OCL 1.1 spec) /////////////////////////////////////////////////////////////////////////////