const GenRegister dst = sel.selReg(insn.getDst(0), getType(opcode));
const GenRegister src = sel.selReg(insn.getSrc(0), getType(opcode));
switch (opcode) {
- case ir::OP_ABS: sel.MOV(dst, GenRegister::abs(src)); break;
+ case ir::OP_ABS:
+ if (insn.getType() == ir::TYPE_S32) {
+ const GenRegister src_ = GenRegister::retype(src, GEN_TYPE_D);
+ const GenRegister dst_ = GenRegister::retype(dst, GEN_TYPE_D);
+ sel.MOV(dst_, GenRegister::abs(src_));
+ } else {
+ GBE_ASSERT(insn.getType() == ir::TYPE_FLOAT);
+ sel.MOV(dst, GenRegister::abs(src));
+ }
+ break;
case ir::OP_MOV:
if (dst.isdf()) {
ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
case GEN_OCL_POW:
case GEN_OCL_RCP:
case GEN_OCL_ABS:
+ case GEN_OCL_FABS:
case GEN_OCL_RNDZ:
case GEN_OCL_RNDE:
case GEN_OCL_RNDU:
}
case GEN_OCL_FBH: this->emitUnaryCallInst(I,CS,ir::OP_FBH); break;
case GEN_OCL_FBL: this->emitUnaryCallInst(I,CS,ir::OP_FBL); break;
+ case GEN_OCL_ABS:
+ {
+ const ir::Register src = this->getRegister(*AI);
+ const ir::Register dst = this->getRegister(&I);
+ ctx.ALU1(ir::OP_ABS, ir::TYPE_S32, dst, src);
+ break;
+ }
case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break;
case GEN_OCL_SQR: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break;
case GEN_OCL_RSQ: this->emitUnaryCallInst(I,CS,ir::OP_RSQ); break;
case GEN_OCL_RCP: this->emitUnaryCallInst(I,CS,ir::OP_RCP); break;
- case GEN_OCL_ABS: this->emitUnaryCallInst(I,CS,ir::OP_ABS); break;
+ case GEN_OCL_FABS: this->emitUnaryCallInst(I,CS,ir::OP_ABS); break;
case GEN_OCL_RNDZ: this->emitUnaryCallInst(I,CS,ir::OP_RNDZ); break;
case GEN_OCL_RNDE: this->emitUnaryCallInst(I,CS,ir::OP_RNDE); break;
case GEN_OCL_RNDU: this->emitUnaryCallInst(I,CS,ir::OP_RNDU); break;
DECL_LLVM_GEN_FUNCTION(GET_WORK_DIM, __gen_ocl_get_work_dim)
// Math function
-DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_fabs)
+DECL_LLVM_GEN_FUNCTION(FABS, __gen_ocl_fabs)
DECL_LLVM_GEN_FUNCTION(COS, __gen_ocl_cos)
DECL_LLVM_GEN_FUNCTION(SIN, __gen_ocl_sin)
DECL_LLVM_GEN_FUNCTION(SQR, __gen_ocl_sqrt)
// integer built-in functions
DECL_LLVM_GEN_FUNCTION(FBH, __gen_ocl_fbh)
DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl)
+DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_abs)
#undef DEC8
#undef DEC16
+int __gen_ocl_abs(int x);
+#define ABS_I(I, CVT) (CVT)__gen_ocl_abs(x.s##I)
+#define ABS_VEC1(CVT) (CVT)__gen_ocl_abs(x)
+#define ABS_VEC2(CVT) ABS_I(0, CVT), ABS_I(1, CVT)
+#define ABS_VEC4(CVT) ABS_VEC2(CVT), ABS_I(2, CVT), ABS_I(3, CVT)
+#define ABS_VEC8(CVT) ABS_VEC4(CVT), ABS_I(4, CVT), ABS_I(5, CVT),\
+ ABS_I(6, CVT), ABS_I(7, CVT)
+#define ABS_VEC16(CVT) ABS_VEC8(CVT), ABS_I(8, CVT), ABS_I(9, CVT), \
+ ABS_I(A, CVT), ABS_I(B, CVT), ABS_I(C, CVT), \
+ ABS_I(D, CVT), ABS_I(E, CVT), ABS_I(F, CVT)
+
+#define DEC_1(TYPE) INLINE_OVERLOADABLE u##TYPE abs(TYPE x) { return ABS_VEC1(u##TYPE); }
+#define DEC_N(TYPE, N) INLINE_OVERLOADABLE u##TYPE##N abs(TYPE##N x) { return (u##TYPE##N)(ABS_VEC##N(u##TYPE)); };
+#define DEC(TYPE) DEC_1(TYPE) DEC_N(TYPE, 2) DEC_N(TYPE, 4) DEC_N(TYPE, 8) DEC_N(TYPE, 16)
+
+DEC(int)
+DEC(short)
+DEC(char)
+#undef DEC_1
+#undef DEC_N
+/* For unsigned types, do nothing. */
+#define DEC_1(TYPE) INLINE_OVERLOADABLE TYPE abs(TYPE x) { return x; }
+#define DEC_N(TYPE, N) INLINE_OVERLOADABLE TYPE##N abs(TYPE##N x) { return x; }
+DEC(uint)
+DEC(ushort)
+DEC(uchar)
+#undef DEC
+#undef DEC_1
+#undef DEC_N
+#undef ABS_I
+#undef ABS_VEC1
+#undef ABS_VEC2
+#undef ABS_VEC4
+#undef ABS_VEC8
+#undef ABS_VEC16
+
/////////////////////////////////////////////////////////////////////////////
// Work Items functions (see 6.11.1 of OCL 1.1 spec)
/////////////////////////////////////////////////////////////////////////////