From 9c7270b40c1feec46583e349ccf9637d8c6471b5 Mon Sep 17 00:00:00 2001 From: Lu Guanqun Date: Thu, 17 Jan 2013 14:37:08 +0800 Subject: [PATCH] add add_sat operation Signed-off-by: Lu Guanqun Reviewed-by: Zhigang Gong --- backend/src/backend/gen_insn_selection.cpp | 6 ++++ backend/src/ir/instruction.cpp | 2 ++ backend/src/ir/instruction.hpp | 2 ++ backend/src/ir/instruction.hxx | 1 + backend/src/llvm/llvm_gen_backend.cpp | 50 ++++++++++++++++++++++++++++++ backend/src/llvm/llvm_gen_ocl_function.hxx | 10 ++++++ backend/src/ocl_stdlib.h | 17 ++++++++++ 7 files changed, 88 insertions(+) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index db02dc1..c34558a 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -1203,6 +1203,12 @@ namespace gbe // Output the binary instruction switch (opcode) { case OP_ADD: sel.ADD(dst, src0, src1); break; + case OP_ADDSAT: + sel.push(); + sel.curr.saturate = GEN_MATH_SATURATE_SATURATE; + sel.ADD(dst, src0, src1); + sel.pop(); + break; case OP_XOR: sel.XOR(dst, src0, src1); break; case OP_OR: sel.OR(dst, src0, src1); break; case OP_AND: sel.AND(dst, src0, src1); break; diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 9a9ce97..74f4797 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -161,6 +161,7 @@ namespace ir { INLINE bool commutes(void) const { switch (opcode) { case OP_ADD: + case OP_ADDSAT: case OP_XOR: case OP_OR: case OP_AND: @@ -1217,6 +1218,7 @@ DECL_MEM_FN(TypedWriteInstruction, Type, getCoordType(void), getCoordType()) DECL_EMIT_FUNCTION(POW) DECL_EMIT_FUNCTION(MUL) DECL_EMIT_FUNCTION(ADD) + DECL_EMIT_FUNCTION(ADDSAT) DECL_EMIT_FUNCTION(SUB) DECL_EMIT_FUNCTION(DIV) DECL_EMIT_FUNCTION(REM) diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 1c859ab..df14bdd 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -461,6 +461,8 @@ namespace ir { Instruction MUL(Type type, Register dst, Register src0, Register src1); /*! add.type dst src0 src1 */ Instruction ADD(Type type, Register dst, Register src0, Register src1); + /*! addsat.type dst src0 src1 */ + Instruction ADDSAT(Type type, Register dst, Register src0, Register src1); /*! sub.type dst src0 src1 */ Instruction SUB(Type type, Register dst, Register src0, Register src1); /*! div.type dst src0 src1 */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 6aedc1f..b469840 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -40,6 +40,7 @@ DECL_INSN(RNDZ, UnaryInstruction) DECL_INSN(POW, BinaryInstruction) DECL_INSN(MUL, BinaryInstruction) DECL_INSN(ADD, BinaryInstruction) +DECL_INSN(ADDSAT, BinaryInstruction) DECL_INSN(SUB, BinaryInstruction) DECL_INSN(DIV, BinaryInstruction) DECL_INSN(REM, BinaryInstruction) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index e35804e..14240cd 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -184,6 +184,24 @@ namespace gbe return ir::TYPE_S64; } + /*! LLVM IR Type to Gen IR unsigned type translation */ + static ir::Type getUnsignedType(const ir::Context &ctx, const Type *type) + { + GBE_ASSERT(type->isIntegerTy() == true); + if (type == Type::getInt1Ty(type->getContext())) + return ir::TYPE_BOOL; + if (type == Type::getInt8Ty(type->getContext())) + return ir::TYPE_U8; + if (type == Type::getInt16Ty(type->getContext())) + return ir::TYPE_U16; + if (type == Type::getInt32Ty(type->getContext())) + return ir::TYPE_U32; + if (type == Type::getInt64Ty(type->getContext())) + return ir::TYPE_U64; + GBE_ASSERT(0); + return ir::TYPE_U64; + } + /*! Type to register family translation */ static ir::RegisterFamily getFamily(const ir::Context &ctx, const Type *type) { @@ -1719,6 +1737,16 @@ namespace gbe this->newRegister(&I); break; } + case GEN_OCL_SADD_SAT_CHAR: + case GEN_OCL_SADD_SAT_SHORT: + case GEN_OCL_SADD_SAT_INT: + case GEN_OCL_SADD_SAT_LONG: + case GEN_OCL_UADD_SAT_CHAR: + case GEN_OCL_UADD_SAT_SHORT: + case GEN_OCL_UADD_SAT_INT: + case GEN_OCL_UADD_SAT_LONG: + this->newRegister(&I); + break; default: GBE_ASSERTM(false, "Function call are not supported yet"); }; @@ -1923,6 +1951,28 @@ namespace gbe ctx.TYPED_WRITE(srcTuple, srcType, coordType); break; } + case GEN_OCL_SADD_SAT_CHAR: + case GEN_OCL_SADD_SAT_SHORT: + case GEN_OCL_SADD_SAT_INT: + case GEN_OCL_SADD_SAT_LONG: + { + GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; + GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; + const ir::Register dst = this->getRegister(&I); + ctx.ADDSAT(getType(ctx, I.getType()), dst, src0, src1); + break; + } + case GEN_OCL_UADD_SAT_CHAR: + case GEN_OCL_UADD_SAT_SHORT: + case GEN_OCL_UADD_SAT_INT: + case GEN_OCL_UADD_SAT_LONG: + { + GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; + GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; + const ir::Register dst = this->getRegister(&I); + ctx.ADDSAT(getUnsignedType(ctx, I.getType()), dst, src0, src1); + break; + } default: break; } } diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index e02796e..3a0e051 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -55,3 +55,13 @@ DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE4, _Z22__gen_ocl_write_imagefjiiDv4_f) DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE5, _Z22__gen_ocl_write_imagefjffDv4_f) DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE2, _Z23__gen_ocl_write_imageuijiiDv4_j) DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE3, _Z23__gen_ocl_write_imageuijffDv4_j) + +// saturation related functions. +DECL_LLVM_GEN_FUNCTION(SADD_SAT_CHAR, _Z12ocl_sadd_satcc) +DECL_LLVM_GEN_FUNCTION(SADD_SAT_SHORT, _Z12ocl_sadd_satss) +DECL_LLVM_GEN_FUNCTION(SADD_SAT_INT, _Z12ocl_sadd_satii) +DECL_LLVM_GEN_FUNCTION(SADD_SAT_LONG, _Z12ocl_sadd_satll) +DECL_LLVM_GEN_FUNCTION(UADD_SAT_CHAR, _Z12ocl_uadd_sathh) +DECL_LLVM_GEN_FUNCTION(UADD_SAT_SHORT, _Z12ocl_uadd_sattt) +DECL_LLVM_GEN_FUNCTION(UADD_SAT_INT, _Z12ocl_uadd_satjj) +DECL_LLVM_GEN_FUNCTION(UADD_SAT_LONG, _Z12ocl_uadd_satmm) diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h index 9518bcb..b6a4d30 100644 --- a/backend/src/ocl_stdlib.h +++ b/backend/src/ocl_stdlib.h @@ -202,6 +202,23 @@ DEF; #undef DECL #undef DEF +#define SDEF(TYPE) \ +INLINE_OVERLOADABLE TYPE ocl_sadd_sat(TYPE x, TYPE y); \ +INLINE_OVERLOADABLE TYPE add_sat(TYPE x, TYPE y) { return ocl_sadd_sat(x, y); } +SDEF(char); +SDEF(short); +SDEF(int); +SDEF(long); +#undef SDEF +#define UDEF(TYPE) \ +INLINE_OVERLOADABLE TYPE ocl_uadd_sat(TYPE x, TYPE y); \ +INLINE_OVERLOADABLE TYPE add_sat(TYPE x, TYPE y) { return ocl_uadd_sat(x, y); } +UDEF(uchar); +UDEF(ushort); +UDEF(uint); +UDEF(ulong); +#undef UDEF + #define DEC2(name) INLINE_OVERLOADABLE int2 name(float2 x) { return (name(x.s0), name(x.s1)); } #define DEC3(name) INLINE_OVERLOADABLE int3 name(float3 x) { return (name(x.s0), name(x.s1), name(x.s2)); } #define DEC4(name) INLINE_OVERLOADABLE int4 name(float4 x) { return (name(x.s0), name(x.s1), name(x.s2), name(x.s3)); } -- 2.7.4