[GEN_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
[GEN_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
[GEN_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
+ [GEN_OPCODE_FBH] = { .name = "fbh", .nsrc = 1, .ndst = 1 },
+ [GEN_OPCODE_FBL] = { .name = "fbl", .nsrc = 1, .ndst = 1 },
[GEN_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
const GenRegister src = ra->genReg(insn.src(0));
switch (insn.opcode) {
case SEL_OP_MOV: p->MOV(dst, src); break;
+ case SEL_OP_FBH: p->FBH(dst, src); break;
+ case SEL_OP_FBL: p->FBL(dst, src); break;
case SEL_OP_NOT: p->NOT(dst, src); break;
case SEL_OP_RNDD: p->RNDD(dst, src); break;
case SEL_OP_RNDU: p->RNDU(dst, src); break;
GEN_OPCODE_MAC = 72,
GEN_OPCODE_MACH = 73,
GEN_OPCODE_LZD = 74,
+ GEN_OPCODE_FBH = 75,
+ GEN_OPCODE_FBL = 76,
GEN_OPCODE_SAD2 = 80,
GEN_OPCODE_SADA2 = 81,
GEN_OPCODE_DP4 = 84,
ALU1(RNDE)
ALU1(RNDD)
ALU1(RNDU)
+ ALU1(FBH)
+ ALU1(FBL)
ALU2(SEL)
ALU1(NOT)
ALU2(AND)
#define ALU2(OP) void OP(GenRegister dest, GenRegister src0, GenRegister src1);
#define ALU3(OP) void OP(GenRegister dest, GenRegister src0, GenRegister src1, GenRegister src2);
ALU1(MOV)
+ ALU1(FBH)
+ ALU1(FBL)
ALU1(RNDZ)
ALU1(RNDE)
ALU1(RNDD)
ALU2(MACH)
ALU1(LZD)
ALU3(MAD)
+ ALU1(FBH)
+ ALU1(FBL)
#undef ALU1
#undef ALU2
#undef ALU3
/*! Unary instruction patterns */
DECL_PATTERN(UnaryInstruction)
{
+ static ir::Type getType(const ir::Opcode opcode) {
+ if (opcode == ir::OP_FBH || opcode == ir::OP_FBL)
+ return ir::TYPE_U32;
+ return ir::TYPE_FLOAT;
+ }
+
INLINE bool emitOne(Selection::Opaque &sel, const ir::UnaryInstruction &insn) const {
const ir::Opcode opcode = insn.getOpcode();
- const GenRegister dst = sel.selReg(insn.getDst(0));
- const GenRegister src = sel.selReg(insn.getSrc(0));
+ const GenRegister dst = sel.selReg(insn.getDst(0), getType(opcode));
+ const GenRegister src = sel.selReg(insn.getSrc(0), getType(opcode));
switch (opcode) {
case ir::OP_ABS: sel.MOV(dst, GenRegister::abs(src)); break;
case ir::OP_MOV:
case ir::OP_RNDE: sel.RNDE(dst, src); break;
case ir::OP_RNDU: sel.RNDU(dst, src); break;
case ir::OP_RNDZ: sel.RNDZ(dst, src); break;
+ case ir::OP_FBH: sel.FBH(dst, src); break;
+ case ir::OP_FBL: sel.FBL(dst, src); break;
case ir::OP_COS: sel.MATH(dst, GEN_MATH_FUNCTION_COS, src); break;
case ir::OP_SIN: sel.MATH(dst, GEN_MATH_FUNCTION_SIN, src); break;
case ir::OP_LOG: sel.MATH(dst, GEN_MATH_FUNCTION_LOG, src); break;
DECL_SELECTION_IR(SAMPLE, SampleInstruction)
DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction)
DECL_SELECTION_IR(GET_IMAGE_INFO, GetImageInfoInstruction)
+DECL_SELECTION_IR(FBH, UnaryInstruction)
+DECL_SELECTION_IR(FBL, UnaryInstruction)
}
DECL_EMIT_FUNCTION(MOV)
+ DECL_EMIT_FUNCTION(FBH)
+ DECL_EMIT_FUNCTION(FBL)
DECL_EMIT_FUNCTION(COS)
DECL_EMIT_FUNCTION(SIN)
DECL_EMIT_FUNCTION(LOG)
Instruction COS(Type type, Register dst, Register src);
/*! sin.type dst src */
Instruction SIN(Type type, Register dst, Register src);
+ /*! fbh.type dst src */
+ Instruction FBH(Type type, Register dst, Register src);
+ /*! fbl.type dst src */
+ Instruction FBL(Type type, Register dst, Register src);
/*! tan.type dst src */
Instruction RCP(Type type, Register dst, Register src);
/*! abs.type dst src */
DECL_INSN(SYNC, SyncInstruction)
DECL_INSN(LABEL, LabelInstruction)
DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction)
+DECL_INSN(FBH, UnaryInstruction)
+DECL_INSN(FBL, UnaryInstruction)
regTranslator.newScalarProxy(ir::ocl::goffset2, dst); break;
case GEN_OCL_GET_WORK_DIM:
regTranslator.newScalarProxy(ir::ocl::workdim, dst); break;
+ case GEN_OCL_FBH:
+ case GEN_OCL_FBL:
case GEN_OCL_COS:
case GEN_OCL_SIN:
case GEN_OCL_SQR:
ctx.POW(ir::TYPE_FLOAT, dst, src0, src1);
break;
}
+ case GEN_OCL_FBH: this->emitUnaryCallInst(I,CS,ir::OP_FBH); break;
+ case GEN_OCL_FBL: this->emitUnaryCallInst(I,CS,ir::OP_FBL); break;
case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break;
DECL_LLVM_GEN_FUNCTION(USUB_SAT_SHORT, _Z12ocl_usub_sattt)
DECL_LLVM_GEN_FUNCTION(USUB_SAT_INT, _Z12ocl_usub_satjj)
DECL_LLVM_GEN_FUNCTION(USUB_SAT_LONG, _Z12ocl_usub_satmm)
+
+// integer built-in functions
+DECL_LLVM_GEN_FUNCTION(FBH, __gen_ocl_fbh)
+DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl)
#undef DEC4
#undef DEC8
#undef DEC16
-
+/////////////////////////////////////////////////////////////////////////////
+// Integer built-in functions
+/////////////////////////////////////////////////////////////////////////////
+PURE CONST uint __gen_ocl_fbh(uint);
+PURE CONST uint __gen_ocl_fbl(uint);
+
+INLINE_OVERLOADABLE char clz(char x) {
+ if (x < 0)
+ return 0;
+ if (x == 0)
+ return 8;
+ return __gen_ocl_fbl(x) - 24;
+}
+
+INLINE_OVERLOADABLE uchar clz(uchar x) {
+ if (x == 0)
+ return 8;
+ return __gen_ocl_fbl(x) - 24;
+}
+
+INLINE_OVERLOADABLE short clz(short x) {
+ if (x < 0)
+ return 0;
+ if (x == 0)
+ return 16;
+ return __gen_ocl_fbh(x) - 16;
+}
+
+INLINE_OVERLOADABLE ushort clz(ushort x) {
+ if (x == 0)
+ return 16;
+ return __gen_ocl_fbh(x) - 16;
+}
+
+INLINE_OVERLOADABLE int clz(int x) {
+ if (x < 0)
+ return 0;
+ if (x == 0)
+ return 32;
+ return __gen_ocl_fbh(x);
+}
+
+INLINE_OVERLOADABLE uint clz(uint x) {
+ if (x == 0)
+ return 32;
+ return __gen_ocl_fbh(x);
+}
+
+#define DEC2(type) INLINE_OVERLOADABLE type##2 clz(type##2 a) { return (clz(a.s0), clz(a.s1)); }
+#define DEC3(type) INLINE_OVERLOADABLE type##3 clz(type##3 a) { return (clz(a.s0), clz(a.s1), clz(a.s2)); }
+#define DEC4(type) INLINE_OVERLOADABLE type##4 clz(type##4 a) { return (clz(a.s0), clz(a.s1), clz(a.s2), clz(a.s3)); }
+#define DEC8(type) INLINE_OVERLOADABLE type##8 clz(type##8 a) { return (clz(a.s0), clz(a.s1), clz(a.s2), clz(a.s3), clz(a.s4), clz(a.s5), clz(a.s6), clz(a.s7)); }
+#define DEC16(type) INLINE_OVERLOADABLE type##16 clz(type##16 a) { return (clz(a.s0), clz(a.s1), clz(a.s2), clz(a.s3), clz(a.s4), clz(a.s5), clz(a.s6), clz(a.s7), clz(a.s8), clz(a.s9), clz(a.sa), clz(a.sb), clz(a.sc), clz(a.sd), clz(a.se), clz(a.sf)); }
+#define DEC(n) DEC##n(char); DEC##n(uchar); DEC##n(short); DEC##n(ushort); DEC##n(int); DEC##n(uint)
+DEC(2)
+DEC(3)
+DEC(4)
+DEC(8)
+DEC(16)
+#undef DEC
+#undef DEC2
+#undef DEC3
+#undef DEC4
+#undef DEC8
+#undef DEC16
/////////////////////////////////////////////////////////////////////////////
// Work Items functions (see 6.11.1 of OCL 1.1 spec)
/////////////////////////////////////////////////////////////////////////////