From 447cb61f8d19e702e04182e4ddac754e9fd05ab1 Mon Sep 17 00:00:00 2001 From: Ruiling Song Date: Tue, 5 Nov 2013 16:37:13 +0800 Subject: [PATCH] GBE: use ISA mad for mad() builtin function. directly map mad() to ISA mad. so mad will have better performance and less precision loss. Signed-off-by: Ruiling Song Reviewed-by: Zhigang Gong --- backend/src/backend/gen_insn_selection.cpp | 5 +++++ backend/src/ir/context.hpp | 1 + backend/src/ir/instruction.cpp | 3 +++ backend/src/ir/instruction.hpp | 2 ++ backend/src/ir/instruction.hxx | 1 + backend/src/llvm/llvm_gen_backend.cpp | 9 +++++++++ backend/src/llvm/llvm_gen_ocl_function.hxx | 1 + backend/src/ocl_stdlib.tmpl.h | 3 ++- 8 files changed, 24 insertions(+), 1 deletion(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 7eae7ca..214c2c2 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -2704,6 +2704,11 @@ namespace gbe sel.I64MADSAT(dst, src0, src1, src2, tmp); break; } + case OP_MAD: + { + sel.MAD(dst, src2, src0, src1); + break; + } default: NOT_IMPLEMENTED; } diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp index 7bb6e5a..242beaa 100644 --- a/backend/src/ir/context.hpp +++ b/backend/src/ir/context.hpp @@ -149,6 +149,7 @@ namespace ir { } DECL_THREE_SRC_INSN(SEL); DECL_THREE_SRC_INSN(I64MADSAT); + DECL_THREE_SRC_INSN(MAD); #undef DECL_THREE_SRC_INSN /*! For all unary functions */ diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 9b3e699..d86c3c0 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -1449,6 +1449,9 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType()) return internal::TernaryInstruction(OP_I64MADSAT, type, dst, src).convert(); } + Instruction MAD(Type type, Register dst, Tuple src) { + return internal::TernaryInstruction(OP_MAD, type, dst, src).convert(); + } // All compare functions #define DECL_EMIT_FUNCTION(NAME) \ Instruction NAME(Type type, Register dst, Register src0, Register src1) { \ diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 90c819b..ae45a63 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -539,6 +539,8 @@ namespace ir { Instruction I64_MUL_HI(Type type, Register dst, Register src0, Register src1); /*! i64madsat.type dst src */ Instruction I64MADSAT(Type type, Register dst, Tuple src); + /*! mad.type dst src */ + Instruction MAD(Type type, Register dst, Tuple src); /*! upsample_short.type dst src */ Instruction UPSAMPLE_SHORT(Type type, Register dst, Register src0, Register src1); /*! upsample_int.type dst src */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index cd60349..67dc682 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -86,3 +86,4 @@ DECL_INSN(UPSAMPLE_SHORT, BinaryInstruction) DECL_INSN(UPSAMPLE_INT, BinaryInstruction) DECL_INSN(UPSAMPLE_LONG, BinaryInstruction) DECL_INSN(I64MADSAT, TernaryInstruction) +DECL_INSN(MAD, TernaryInstruction) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index b824bf9..aae52d8 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2016,6 +2016,7 @@ namespace gbe case GEN_OCL_UPSAMPLE_SHORT: case GEN_OCL_UPSAMPLE_INT: case GEN_OCL_UPSAMPLE_LONG: + case GEN_OCL_MAD: case GEN_OCL_SADD_SAT_CHAR: case GEN_OCL_SADD_SAT_SHORT: case GEN_OCL_SADD_SAT_INT: @@ -2533,6 +2534,14 @@ namespace gbe ctx.I64MADSAT(getUnsignedType(ctx, I.getType()), dst, src0, src1, src2); break; } + case GEN_OCL_MAD: { + GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; + GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; + GBE_ASSERT(AI != AE); const ir::Register src2 = this->getRegister(*AI); ++AI; + const ir::Register dst = this->getRegister(&I); + ctx.MAD(getType(ctx, I.getType()), dst, src0, src1, src2); + break; + } case GEN_OCL_HADD: { GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 3f44be8..71034ab 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -31,6 +31,7 @@ DECL_LLVM_GEN_FUNCTION(RNDZ, __gen_ocl_rndz) DECL_LLVM_GEN_FUNCTION(RNDE, __gen_ocl_rnde) DECL_LLVM_GEN_FUNCTION(RNDU, __gen_ocl_rndu) DECL_LLVM_GEN_FUNCTION(RNDD, __gen_ocl_rndd) +DECL_LLVM_GEN_FUNCTION(MAD, __gen_ocl_mad) // Barrier function DECL_LLVM_GEN_FUNCTION(LBARRIER, __gen_ocl_barrier_local) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index a1f365c..09c36d5 100644 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -1558,8 +1558,9 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_rint(float x) { #define erf __gen_ocl_internal_erf #define erfc __gen_ocl_internal_erfc +PURE CONST float __gen_ocl_mad(float a, float b, float c); INLINE_OVERLOADABLE float mad(float a, float b, float c) { - return a*b+c; + return __gen_ocl_mad(a, b, c); } #define DEF(TYPE1, TYPE2) \ -- 2.7.4