case GEN_OCL_UPSAMPLE_INT:
case GEN_OCL_UPSAMPLE_LONG:
case GEN_OCL_MAD:
+ case GEN_OCL_FMAX:
+ case GEN_OCL_FMIN:
case GEN_OCL_SADD_SAT_CHAR:
case GEN_OCL_SADD_SAT_SHORT:
case GEN_OCL_SADD_SAT_INT:
ctx.MAD(getType(ctx, I.getType()), dst, src0, src1, src2);
break;
}
+ case GEN_OCL_FMAX:
+ case GEN_OCL_FMIN:{
+ GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+ GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+ const ir::Register dst = this->getRegister(&I);
+ const ir::Register cmp = ctx.reg(ir::FAMILY_BOOL);
+ //Becasue cmp's sources are same as sel's source, so cmp instruction and sel
+ //instruction will be merged to one sel_cmp instruction in the gen selection
+ //Add two intruction here for simple.
+ if(it->second == GEN_OCL_FMAX)
+ ctx.GE(getType(ctx, I.getType()), cmp, src0, src1);
+ else
+ ctx.LT(getType(ctx, I.getType()), cmp, src0, src1);
+ ctx.SEL(getType(ctx, I.getType()), dst, cmp, src0, src1);
+ break;
+ }
case GEN_OCL_HADD: {
GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
DECL_LLVM_GEN_FUNCTION(RNDU, __gen_ocl_rndu)
DECL_LLVM_GEN_FUNCTION(RNDD, __gen_ocl_rndd)
DECL_LLVM_GEN_FUNCTION(MAD, __gen_ocl_mad)
+DECL_LLVM_GEN_FUNCTION(FMAX, __gen_ocl_fmax)
+DECL_LLVM_GEN_FUNCTION(FMIN, __gen_ocl_fmin)
// Barrier function
DECL_LLVM_GEN_FUNCTION(LBARRIER, __gen_ocl_barrier_local)
#define remainder __gen_ocl_internal_remainder
#define ldexp __gen_ocl_internal_ldexp
PURE CONST float __gen_ocl_mad(float a, float b, float c);
+PURE CONST float __gen_ocl_fmax(float a, float b);
+PURE CONST float __gen_ocl_fmin(float a, float b);
INLINE_OVERLOADABLE float mad(float a, float b, float c) {
return __gen_ocl_mad(a, b, c);
}
DECL_MIN_MAX_CLAMP(ulong)
#undef DECL_MIN_MAX_CLAMP
INLINE_OVERLOADABLE float max(float a, float b) {
- if(isnan(b))
- return a;
- return a > b ? a : b;
+ return __gen_ocl_fmax(a, b);
}
INLINE_OVERLOADABLE float min(float a, float b) {
- if(isnan(b))
- return a;
- return a < b ? a : b;
+ return __gen_ocl_fmin(a, b);
}
INLINE_OVERLOADABLE float clamp(float v, float l, float u) {
return max(min(v, u), l);