// Output the binary instruction
switch (opcode) {
case OP_ADD: sel.ADD(dst, src0, src1); break;
+ case OP_ADDSAT:
+ sel.push();
+ sel.curr.saturate = GEN_MATH_SATURATE_SATURATE;
+ sel.ADD(dst, src0, src1);
+ sel.pop();
+ break;
case OP_XOR: sel.XOR(dst, src0, src1); break;
case OP_OR: sel.OR(dst, src0, src1); break;
case OP_AND: sel.AND(dst, src0, src1); break;
INLINE bool commutes(void) const {
switch (opcode) {
case OP_ADD:
+ case OP_ADDSAT:
case OP_XOR:
case OP_OR:
case OP_AND:
DECL_EMIT_FUNCTION(POW)
DECL_EMIT_FUNCTION(MUL)
DECL_EMIT_FUNCTION(ADD)
+ DECL_EMIT_FUNCTION(ADDSAT)
DECL_EMIT_FUNCTION(SUB)
DECL_EMIT_FUNCTION(DIV)
DECL_EMIT_FUNCTION(REM)
Instruction MUL(Type type, Register dst, Register src0, Register src1);
/*! add.type dst src0 src1 */
Instruction ADD(Type type, Register dst, Register src0, Register src1);
+ /*! addsat.type dst src0 src1 */
+ Instruction ADDSAT(Type type, Register dst, Register src0, Register src1);
/*! sub.type dst src0 src1 */
Instruction SUB(Type type, Register dst, Register src0, Register src1);
/*! div.type dst src0 src1 */
DECL_INSN(POW, BinaryInstruction)
DECL_INSN(MUL, BinaryInstruction)
DECL_INSN(ADD, BinaryInstruction)
+DECL_INSN(ADDSAT, BinaryInstruction)
DECL_INSN(SUB, BinaryInstruction)
DECL_INSN(DIV, BinaryInstruction)
DECL_INSN(REM, BinaryInstruction)
return ir::TYPE_S64;
}
+ /*! LLVM IR Type to Gen IR unsigned type translation */
+ static ir::Type getUnsignedType(const ir::Context &ctx, const Type *type)
+ {
+ GBE_ASSERT(type->isIntegerTy() == true);
+ if (type == Type::getInt1Ty(type->getContext()))
+ return ir::TYPE_BOOL;
+ if (type == Type::getInt8Ty(type->getContext()))
+ return ir::TYPE_U8;
+ if (type == Type::getInt16Ty(type->getContext()))
+ return ir::TYPE_U16;
+ if (type == Type::getInt32Ty(type->getContext()))
+ return ir::TYPE_U32;
+ if (type == Type::getInt64Ty(type->getContext()))
+ return ir::TYPE_U64;
+ GBE_ASSERT(0);
+ return ir::TYPE_U64;
+ }
+
/*! Type to register family translation */
static ir::RegisterFamily getFamily(const ir::Context &ctx, const Type *type)
{
this->newRegister(&I);
break;
}
+ case GEN_OCL_SADD_SAT_CHAR:
+ case GEN_OCL_SADD_SAT_SHORT:
+ case GEN_OCL_SADD_SAT_INT:
+ case GEN_OCL_SADD_SAT_LONG:
+ case GEN_OCL_UADD_SAT_CHAR:
+ case GEN_OCL_UADD_SAT_SHORT:
+ case GEN_OCL_UADD_SAT_INT:
+ case GEN_OCL_UADD_SAT_LONG:
+ this->newRegister(&I);
+ break;
default:
GBE_ASSERTM(false, "Function call are not supported yet");
};
ctx.TYPED_WRITE(srcTuple, srcType, coordType);
break;
}
+ case GEN_OCL_SADD_SAT_CHAR:
+ case GEN_OCL_SADD_SAT_SHORT:
+ case GEN_OCL_SADD_SAT_INT:
+ case GEN_OCL_SADD_SAT_LONG:
+ {
+ GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+ GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+ const ir::Register dst = this->getRegister(&I);
+ ctx.ADDSAT(getType(ctx, I.getType()), dst, src0, src1);
+ break;
+ }
+ case GEN_OCL_UADD_SAT_CHAR:
+ case GEN_OCL_UADD_SAT_SHORT:
+ case GEN_OCL_UADD_SAT_INT:
+ case GEN_OCL_UADD_SAT_LONG:
+ {
+ GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+ GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+ const ir::Register dst = this->getRegister(&I);
+ ctx.ADDSAT(getUnsignedType(ctx, I.getType()), dst, src0, src1);
+ break;
+ }
default: break;
}
}
DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE5, _Z22__gen_ocl_write_imagefjffDv4_f)
DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE2, _Z23__gen_ocl_write_imageuijiiDv4_j)
DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE3, _Z23__gen_ocl_write_imageuijffDv4_j)
+
+// saturation related functions.
+DECL_LLVM_GEN_FUNCTION(SADD_SAT_CHAR, _Z12ocl_sadd_satcc)
+DECL_LLVM_GEN_FUNCTION(SADD_SAT_SHORT, _Z12ocl_sadd_satss)
+DECL_LLVM_GEN_FUNCTION(SADD_SAT_INT, _Z12ocl_sadd_satii)
+DECL_LLVM_GEN_FUNCTION(SADD_SAT_LONG, _Z12ocl_sadd_satll)
+DECL_LLVM_GEN_FUNCTION(UADD_SAT_CHAR, _Z12ocl_uadd_sathh)
+DECL_LLVM_GEN_FUNCTION(UADD_SAT_SHORT, _Z12ocl_uadd_sattt)
+DECL_LLVM_GEN_FUNCTION(UADD_SAT_INT, _Z12ocl_uadd_satjj)
+DECL_LLVM_GEN_FUNCTION(UADD_SAT_LONG, _Z12ocl_uadd_satmm)
#undef DECL
#undef DEF
+#define SDEF(TYPE) \
+INLINE_OVERLOADABLE TYPE ocl_sadd_sat(TYPE x, TYPE y); \
+INLINE_OVERLOADABLE TYPE add_sat(TYPE x, TYPE y) { return ocl_sadd_sat(x, y); }
+SDEF(char);
+SDEF(short);
+SDEF(int);
+SDEF(long);
+#undef SDEF
+#define UDEF(TYPE) \
+INLINE_OVERLOADABLE TYPE ocl_uadd_sat(TYPE x, TYPE y); \
+INLINE_OVERLOADABLE TYPE add_sat(TYPE x, TYPE y) { return ocl_uadd_sat(x, y); }
+UDEF(uchar);
+UDEF(ushort);
+UDEF(uint);
+UDEF(ulong);
+#undef UDEF
+
#define DEC2(name) INLINE_OVERLOADABLE int2 name(float2 x) { return (name(x.s0), name(x.s1)); }
#define DEC3(name) INLINE_OVERLOADABLE int3 name(float3 x) { return (name(x.s0), name(x.s1), name(x.s2)); }
#define DEC4(name) INLINE_OVERLOADABLE int4 name(float4 x) { return (name(x.s0), name(x.s1), name(x.s2), name(x.s3)); }