From 7d8897fdb0d4d6efd279cd08a47f239695a92814 Mon Sep 17 00:00:00 2001 From: Yang Rong Date: Mon, 24 Mar 2014 16:27:31 +0800 Subject: [PATCH] Refined the fmax and fmin builtins. Because GEN's select instruction with cmod .l and .ge will handle NaN case, so use the compare and select instruction in gen ir for fmax and fmin, and will be optimized to one sel_cmp, need not check isnan. Signed-off-by: Yang Rong Reviewed-by: "Zou, Nanhai" Reviewed-by: Zhigang Gong --- backend/src/llvm/llvm_gen_backend.cpp | 18 ++++++++++++++++++ backend/src/llvm/llvm_gen_ocl_function.hxx | 2 ++ backend/src/ocl_stdlib.tmpl.h | 10 ++++------ 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 227ef09..1090f97 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2135,6 +2135,8 @@ namespace gbe case GEN_OCL_UPSAMPLE_INT: case GEN_OCL_UPSAMPLE_LONG: case GEN_OCL_MAD: + case GEN_OCL_FMAX: + case GEN_OCL_FMIN: case GEN_OCL_SADD_SAT_CHAR: case GEN_OCL_SADD_SAT_SHORT: case GEN_OCL_SADD_SAT_INT: @@ -2623,6 +2625,22 @@ namespace gbe ctx.MAD(getType(ctx, I.getType()), dst, src0, src1, src2); break; } + case GEN_OCL_FMAX: + case GEN_OCL_FMIN:{ + GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; + GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; + const ir::Register dst = this->getRegister(&I); + const ir::Register cmp = ctx.reg(ir::FAMILY_BOOL); + //Becasue cmp's sources are same as sel's source, so cmp instruction and sel + //instruction will be merged to one sel_cmp instruction in the gen selection + //Add two intruction here for simple. + if(it->second == GEN_OCL_FMAX) + ctx.GE(getType(ctx, I.getType()), cmp, src0, src1); + else + ctx.LT(getType(ctx, I.getType()), cmp, src0, src1); + ctx.SEL(getType(ctx, I.getType()), dst, cmp, src0, src1); + break; + } case GEN_OCL_HADD: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 00d69f0..5bf794a 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -33,6 +33,8 @@ DECL_LLVM_GEN_FUNCTION(RNDE, __gen_ocl_rnde) DECL_LLVM_GEN_FUNCTION(RNDU, __gen_ocl_rndu) DECL_LLVM_GEN_FUNCTION(RNDD, __gen_ocl_rndd) DECL_LLVM_GEN_FUNCTION(MAD, __gen_ocl_mad) +DECL_LLVM_GEN_FUNCTION(FMAX, __gen_ocl_fmax) +DECL_LLVM_GEN_FUNCTION(FMIN, __gen_ocl_fmin) // Barrier function DECL_LLVM_GEN_FUNCTION(LBARRIER, __gen_ocl_barrier_local) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 25f2ff7..50107d8 100755 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -3169,6 +3169,8 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_exp10(float x){ #define remainder __gen_ocl_internal_remainder #define ldexp __gen_ocl_internal_ldexp PURE CONST float __gen_ocl_mad(float a, float b, float c); +PURE CONST float __gen_ocl_fmax(float a, float b); +PURE CONST float __gen_ocl_fmin(float a, float b); INLINE_OVERLOADABLE float mad(float a, float b, float c) { return __gen_ocl_mad(a, b, c); } @@ -3224,14 +3226,10 @@ DECL_MIN_MAX_CLAMP(long) DECL_MIN_MAX_CLAMP(ulong) #undef DECL_MIN_MAX_CLAMP INLINE_OVERLOADABLE float max(float a, float b) { - if(isnan(b)) - return a; - return a > b ? a : b; + return __gen_ocl_fmax(a, b); } INLINE_OVERLOADABLE float min(float a, float b) { - if(isnan(b)) - return a; - return a < b ? a : b; + return __gen_ocl_fmin(a, b); } INLINE_OVERLOADABLE float clamp(float v, float l, float u) { return max(min(v, u), l); -- 2.7.4