case OP_OR: sel.OR(dst, src0, src1); break;
case OP_AND: sel.AND(dst, src0, src1); break;
case OP_SUB: sel.ADD(dst, src0, GenRegister::negate(src1)); break;
+ case OP_SUBSAT:
+ sel.push();
+ sel.curr.saturate = GEN_MATH_SATURATE_SATURATE;
+ sel.ADD(dst, src0, GenRegister::negate(src1));
+ sel.pop();
+ break;
case OP_SHL: sel.SHL(dst, src0, src1); break;
case OP_SHR: sel.SHR(dst, src0, src1); break;
case OP_ASR: sel.ASR(dst, src0, src1); break;
DECL_EMIT_FUNCTION(ADD)
DECL_EMIT_FUNCTION(ADDSAT)
DECL_EMIT_FUNCTION(SUB)
+ DECL_EMIT_FUNCTION(SUBSAT)
DECL_EMIT_FUNCTION(DIV)
DECL_EMIT_FUNCTION(REM)
DECL_EMIT_FUNCTION(SHL)
Instruction ADDSAT(Type type, Register dst, Register src0, Register src1);
/*! sub.type dst src0 src1 */
Instruction SUB(Type type, Register dst, Register src0, Register src1);
+ /*! subsat.type dst src0 src1 */
+ Instruction SUBSAT(Type type, Register dst, Register src0, Register src1);
/*! div.type dst src0 src1 */
Instruction DIV(Type type, Register dst, Register src0, Register src1);
/*! rem.type dst src0 src1 */
DECL_INSN(ADD, BinaryInstruction)
DECL_INSN(ADDSAT, BinaryInstruction)
DECL_INSN(SUB, BinaryInstruction)
+DECL_INSN(SUBSAT, BinaryInstruction)
DECL_INSN(DIV, BinaryInstruction)
DECL_INSN(REM, BinaryInstruction)
DECL_INSN(SHL, BinaryInstruction)
case GEN_OCL_UADD_SAT_SHORT:
case GEN_OCL_UADD_SAT_INT:
case GEN_OCL_UADD_SAT_LONG:
+ case GEN_OCL_SSUB_SAT_CHAR:
+ case GEN_OCL_SSUB_SAT_SHORT:
+ case GEN_OCL_SSUB_SAT_INT:
+ case GEN_OCL_SSUB_SAT_LONG:
+ case GEN_OCL_USUB_SAT_CHAR:
+ case GEN_OCL_USUB_SAT_SHORT:
+ case GEN_OCL_USUB_SAT_INT:
+ case GEN_OCL_USUB_SAT_LONG:
this->newRegister(&I);
break;
default:
ctx.ADDSAT(getUnsignedType(ctx, I.getType()), dst, src0, src1);
break;
}
+ case GEN_OCL_SSUB_SAT_CHAR:
+ case GEN_OCL_SSUB_SAT_SHORT:
+ case GEN_OCL_SSUB_SAT_INT:
+ case GEN_OCL_SSUB_SAT_LONG:
+ {
+ GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+ GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+ const ir::Register dst = this->getRegister(&I);
+ ctx.SUBSAT(getType(ctx, I.getType()), dst, src0, src1);
+ break;
+ }
+ case GEN_OCL_USUB_SAT_CHAR:
+ case GEN_OCL_USUB_SAT_SHORT:
+ case GEN_OCL_USUB_SAT_INT:
+ case GEN_OCL_USUB_SAT_LONG:
+ {
+ GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+ GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+ const ir::Register dst = this->getRegister(&I);
+ ctx.SUBSAT(getUnsignedType(ctx, I.getType()), dst, src0, src1);
+ break;
+ }
default: break;
}
}
DECL_LLVM_GEN_FUNCTION(UADD_SAT_SHORT, _Z12ocl_uadd_sattt)
DECL_LLVM_GEN_FUNCTION(UADD_SAT_INT, _Z12ocl_uadd_satjj)
DECL_LLVM_GEN_FUNCTION(UADD_SAT_LONG, _Z12ocl_uadd_satmm)
+
+DECL_LLVM_GEN_FUNCTION(SSUB_SAT_CHAR, _Z12ocl_ssub_satcc)
+DECL_LLVM_GEN_FUNCTION(SSUB_SAT_SHORT, _Z12ocl_ssub_satss)
+DECL_LLVM_GEN_FUNCTION(SSUB_SAT_INT, _Z12ocl_ssub_satii)
+DECL_LLVM_GEN_FUNCTION(SSUB_SAT_LONG, _Z12ocl_ssub_satll)
+DECL_LLVM_GEN_FUNCTION(USUB_SAT_CHAR, _Z12ocl_usub_sathh)
+DECL_LLVM_GEN_FUNCTION(USUB_SAT_SHORT, _Z12ocl_usub_sattt)
+DECL_LLVM_GEN_FUNCTION(USUB_SAT_INT, _Z12ocl_usub_satjj)
+DECL_LLVM_GEN_FUNCTION(USUB_SAT_LONG, _Z12ocl_usub_satmm)
#define SDEF(TYPE) \
INLINE_OVERLOADABLE TYPE ocl_sadd_sat(TYPE x, TYPE y); \
-INLINE_OVERLOADABLE TYPE add_sat(TYPE x, TYPE y) { return ocl_sadd_sat(x, y); }
+INLINE_OVERLOADABLE TYPE ocl_ssub_sat(TYPE x, TYPE y); \
+INLINE_OVERLOADABLE TYPE add_sat(TYPE x, TYPE y) { return ocl_sadd_sat(x, y); } \
+INLINE_OVERLOADABLE TYPE sub_sat(TYPE x, TYPE y) { return ocl_ssub_sat(x, y); }
SDEF(char);
SDEF(short);
SDEF(int);
#undef SDEF
#define UDEF(TYPE) \
INLINE_OVERLOADABLE TYPE ocl_uadd_sat(TYPE x, TYPE y); \
-INLINE_OVERLOADABLE TYPE add_sat(TYPE x, TYPE y) { return ocl_uadd_sat(x, y); }
+INLINE_OVERLOADABLE TYPE ocl_usub_sat(TYPE x, TYPE y); \
+INLINE_OVERLOADABLE TYPE add_sat(TYPE x, TYPE y) { return ocl_uadd_sat(x, y); } \
+INLINE_OVERLOADABLE TYPE sub_sat(TYPE x, TYPE y) { return ocl_usub_sat(x, y); }
UDEF(uchar);
UDEF(ushort);
UDEF(uint);