From be73d25fc4bd3e68b94a37e524f7edf4aca53ce3 Mon Sep 17 00:00:00 2001 From: Guo Yejun Date: Fri, 18 Apr 2014 13:42:16 +0800 Subject: [PATCH] support __gen_ocl_simd_any and __gen_ocl_simd_all short __gen_ocl_simd_any(short x): if x in any of the active threads in the same SIMD is not zero, the return value for all these threads is not zero, otherwise, zero returned. short __gen_ocl_simd_all(short x): only if x in all of the active threads in the same SIMD is not zero, the return value for all these threads is not zero, otherwise, zero returned. for example: to check if a special value exists in a global buffer, use one SIMD to do the searching parallelly, the whole SIMD can stop the task once the value is found. The key kernel code looks like: for(; ; ) { ... if (__gen_ocl_simd_any(...)) break; //the whole SIMD stop the searching } Signed-off-by: Guo Yejun Reviewed-by: Zhigang Gong --- backend/src/backend/gen_insn_selection.cpp | 61 ++++++++++++++++++++++++++++++ backend/src/ir/instruction.hpp | 4 ++ backend/src/ir/instruction.hxx | 2 + backend/src/llvm/llvm_gen_backend.cpp | 16 ++++++++ backend/src/llvm/llvm_gen_ocl_function.hxx | 4 ++ backend/src/ocl_stdlib.tmpl.h | 8 ++++ 6 files changed, 95 insertions(+) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 75a6875..bcbf115 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -1730,6 +1730,67 @@ namespace gbe case ir::OP_SQR: sel.MATH(dst, GEN_MATH_FUNCTION_SQRT, src); break; case ir::OP_RSQ: sel.MATH(dst, GEN_MATH_FUNCTION_RSQ, src); break; case ir::OP_RCP: sel.MATH(dst, GEN_MATH_FUNCTION_INV, src); break; + case ir::OP_SIMD_ANY: + { + const GenRegister constZero = GenRegister::immuw(0);; + const GenRegister regOne = GenRegister::uw1grf(ir::ocl::one); + const GenRegister flag01 = GenRegister::flag(0, 1); + + sel.push(); + int simdWidth = sel.curr.execWidth; + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.curr.execWidth = 1; + sel.curr.noMask = 1; + sel.MOV(flag01, constZero); + + sel.curr.execWidth = simdWidth; + sel.curr.noMask = 0; + + sel.curr.flag = 0; + sel.curr.subFlag = 1; + sel.CMP(GEN_CONDITIONAL_NEQ, src, constZero); + + if (sel.curr.execWidth == 16) + sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H; + else if (sel.curr.execWidth == 8) + sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H; + else + NOT_IMPLEMENTED; + sel.SEL(dst, regOne, constZero); + sel.pop(); + } + break; + case ir::OP_SIMD_ALL: + { + const GenRegister constZero = GenRegister::immuw(0); + const GenRegister regOne = GenRegister::uw1grf(ir::ocl::one); + const GenRegister flag01 = GenRegister::flag(0, 1); + + sel.push(); + int simdWidth = sel.curr.execWidth; + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.curr.execWidth = 1; + sel.curr.noMask = 1; + sel.MOV(flag01, regOne); + + sel.curr.execWidth = simdWidth; + sel.curr.noMask = 0; + + sel.curr.flag = 0; + sel.curr.subFlag = 1; + sel.CMP(GEN_CONDITIONAL_NEQ, src, constZero); + + if (sel.curr.execWidth == 16) + sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL16H; + else if (sel.curr.execWidth == 8) + sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H; + else + NOT_IMPLEMENTED; + sel.SEL(dst, regOne, constZero); + sel.pop(); + } + break; + default: NOT_SUPPORTED; } return true; diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 457b5b4..582e22d 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -567,6 +567,10 @@ namespace ir { Instruction RCP(Type type, Register dst, Register src); /*! abs.type dst src */ Instruction ABS(Type type, Register dst, Register src); + /*! simd_all.type dst src */ + Instruction SIMD_ALL(Type type, Register dst, Register src); + /*! simd_any.type dst src */ + Instruction SIMD_ANY(Type type, Register dst, Register src); /*! log.type dst src */ Instruction LOG(Type type, Register dst, Register src); /*! exp.type dst src */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index bebceff..587517b 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -38,6 +38,8 @@ DECL_INSN(RNDD, UnaryInstruction) DECL_INSN(RNDE, UnaryInstruction) DECL_INSN(RNDU, UnaryInstruction) DECL_INSN(RNDZ, UnaryInstruction) +DECL_INSN(SIMD_ANY, UnaryInstruction) +DECL_INSN(SIMD_ALL, UnaryInstruction) DECL_INSN(POW, BinaryInstruction) DECL_INSN(MUL, BinaryInstruction) DECL_INSN(ADD, BinaryInstruction) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index b46e991..6c2b45d 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2282,6 +2282,8 @@ namespace gbe case GEN_OCL_SAT_CONV_F32_TO_U32: case GEN_OCL_CONV_F16_TO_F32: case GEN_OCL_CONV_F32_TO_F16: + case GEN_OCL_SIMD_ANY: + case GEN_OCL_SIMD_ALL: this->newRegister(&I); break; default: @@ -2422,6 +2424,20 @@ namespace gbe ctx.ALU1(ir::OP_ABS, ir::TYPE_S32, dst, src); break; } + case GEN_OCL_SIMD_ALL: + { + const ir::Register src = this->getRegister(*AI); + const ir::Register dst = this->getRegister(&I); + ctx.ALU1(ir::OP_SIMD_ALL, ir::TYPE_S16, dst, src); + break; + } + case GEN_OCL_SIMD_ANY: + { + const ir::Register src = this->getRegister(*AI); + const ir::Register dst = this->getRegister(&I); + ctx.ALU1(ir::OP_SIMD_ANY, ir::TYPE_S16, dst, src); + break; + } case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break; case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break; case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break; diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 7058a60..4236298 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -175,3 +175,7 @@ DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U32, _Z16convert_uint_satf) DECL_LLVM_GEN_FUNCTION(CONV_F16_TO_F32, __gen_ocl_f16to32) DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16) + +// SIMD level function for internal usage +DECL_LLVM_GEN_FUNCTION(SIMD_ANY, __gen_ocl_simd_any) +DECL_LLVM_GEN_FUNCTION(SIMD_ALL, __gen_ocl_simd_all) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 22e3aec..cd8b918 100755 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -638,6 +638,14 @@ INLINE_OVERLOADABLE ulong abs_diff (ulong x, ulong y) { return y > x ? (y - x) : (x - y); } + +///////////////////////////////////////////////////////////////////////////// +// SIMD level function +///////////////////////////////////////////////////////////////////////////// +short __gen_ocl_simd_any(short); +short __gen_ocl_simd_all(short); + + ///////////////////////////////////////////////////////////////////////////// // Work Items functions (see 6.11.1 of OCL 1.1 spec) ///////////////////////////////////////////////////////////////////////////// -- 2.7.4