From: Homer Hsing Date: Fri, 16 Aug 2013 01:45:17 +0000 (+0800) Subject: add 64bit version of "upsample" X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=731f155f3726017549bfebd7ac8a6054a55f1031;p=contrib%2Fbeignet.git add 64bit version of "upsample" since simple 64bit integer are supported, add 64bit version of "upsample". to test this patch, in piglit, run bin/cl-program-tester generated_tests/cl/builtin/int/builtin-int-upsample-1.0.generated.cl bin/cl-program-tester generated_tests/cl/builtin/int/builtin-uint-upsample-1.0.generated.cl piglit test cases all pass. Signed-off-by: Homer Hsing Reviewed-by: Zhigang Gong --- diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index ff18c46..1a012fe 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -381,6 +381,25 @@ namespace gbe case SEL_OP_MACH: p->MACH(dst, src0, src1); break; case SEL_OP_UPSAMPLE_SHORT: p->UPSAMPLE_SHORT(dst, src0, src1); break; case SEL_OP_UPSAMPLE_INT: p->UPSAMPLE_INT(dst, src0, src1); break; + case SEL_OP_UPSAMPLE_LONG: + { + GenRegister xdst = GenRegister::retype(dst, GEN_TYPE_UL), + xsrc0 = GenRegister::retype(src0, GEN_TYPE_UL), + xsrc1 = GenRegister::retype(src1, GEN_TYPE_UL); + int execWidth = p->curr.execWidth; + p->push(); + p->curr.execWidth = 8; + for (int nib = 0; nib < execWidth / 4; nib ++) { + p->curr.chooseNib(nib); + p->MOV(xdst.top_half(), xsrc0.bottom_half()); + p->MOV(xdst.bottom_half(), xsrc1.bottom_half()); + xdst = GenRegister::suboffset(xdst, 4); + xsrc0 = GenRegister::suboffset(xsrc0, 4); + xsrc1 = GenRegister::suboffset(xsrc1, 4); + } + p->pop(); + } + break; default: NOT_IMPLEMENTED; } } diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 7d579f8..8e4cd8f 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -457,6 +457,7 @@ namespace gbe ALU2WithTemp(RHADD) ALU2(UPSAMPLE_SHORT) ALU2(UPSAMPLE_INT) + ALU2(UPSAMPLE_LONG) ALU1WithTemp(CONVI_TO_I64) I64Shift(I64SHL) I64Shift(I64SHR) @@ -1656,6 +1657,9 @@ namespace gbe case OP_UPSAMPLE_INT: sel.UPSAMPLE_INT(dst, src0, src1); break; + case OP_UPSAMPLE_LONG: + sel.UPSAMPLE_LONG(dst, src0, src1); + break; default: NOT_IMPLEMENTED; } sel.pop(); diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index cf974af..32c7a05 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -63,4 +63,5 @@ DECL_SELECTION_IR(HADD, BinaryWithTempInstruction) DECL_SELECTION_IR(RHADD, BinaryWithTempInstruction) DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction) DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction) +DECL_SELECTION_IR(UPSAMPLE_LONG, BinaryInstruction) DECL_SELECTION_IR(CONVI_TO_I64, UnaryWithTempInstruction) diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 3b5ff08..48e83b4 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -1335,6 +1335,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType()) DECL_EMIT_FUNCTION(MUL_HI) DECL_EMIT_FUNCTION(UPSAMPLE_SHORT) DECL_EMIT_FUNCTION(UPSAMPLE_INT) + DECL_EMIT_FUNCTION(UPSAMPLE_LONG) DECL_EMIT_FUNCTION(DIV) DECL_EMIT_FUNCTION(REM) DECL_EMIT_FUNCTION(SHL) diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 48e6963..40a3d40 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -523,6 +523,8 @@ namespace ir { Instruction UPSAMPLE_SHORT(Type type, Register dst, Register src0, Register src1); /*! upsample_int.type dst src */ Instruction UPSAMPLE_INT(Type type, Register dst, Register src0, Register src1); + /*! upsample_long.type dst src */ + Instruction UPSAMPLE_LONG(Type type, Register dst, Register src0, Register src1); /*! fbh.type dst src */ Instruction FBH(Type type, Register dst, Register src); /*! fbl.type dst src */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index b9f0e73..c15e912 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -79,3 +79,4 @@ DECL_INSN(HADD, BinaryInstruction) DECL_INSN(RHADD, BinaryInstruction) DECL_INSN(UPSAMPLE_SHORT, BinaryInstruction) DECL_INSN(UPSAMPLE_INT, BinaryInstruction) +DECL_INSN(UPSAMPLE_LONG, BinaryInstruction) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 18448cf..12d809d 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -1827,6 +1827,7 @@ namespace gbe case GEN_OCL_MUL_HI_UINT: case GEN_OCL_UPSAMPLE_SHORT: case GEN_OCL_UPSAMPLE_INT: + case GEN_OCL_UPSAMPLE_LONG: case GEN_OCL_SADD_SAT_CHAR: case GEN_OCL_SADD_SAT_SHORT: case GEN_OCL_SADD_SAT_INT: @@ -2223,6 +2224,14 @@ namespace gbe ctx.UPSAMPLE_INT(getType(ctx, I.getType()), dst, src0, src1); break; } + case GEN_OCL_UPSAMPLE_LONG: + { + GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI; + GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI; + const ir::Register dst = this->getRegister(&I); + ctx.UPSAMPLE_LONG(getType(ctx, I.getType()), dst, src0, src1); + break; + } case GEN_OCL_SADD_SAT_CHAR: case GEN_OCL_SADD_SAT_SHORT: case GEN_OCL_SADD_SAT_INT: diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 8e940bc..b712860 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -135,3 +135,4 @@ DECL_LLVM_GEN_FUNCTION(HADD, __gen_ocl_hadd) DECL_LLVM_GEN_FUNCTION(RHADD, __gen_ocl_rhadd) DECL_LLVM_GEN_FUNCTION(UPSAMPLE_SHORT, _Z18__gen_ocl_upsampless) DECL_LLVM_GEN_FUNCTION(UPSAMPLE_INT, _Z18__gen_ocl_upsampleii) +DECL_LLVM_GEN_FUNCTION(UPSAMPLE_LONG, _Z18__gen_ocl_upsamplell) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 00d6fda..c055d02 100644 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -430,15 +430,16 @@ INLINE_OVERLOADABLE ulong rotate(ulong x, ulong y) { OVERLOADABLE short __gen_ocl_upsample(short hi, short lo); OVERLOADABLE int __gen_ocl_upsample(int hi, int lo); +OVERLOADABLE long __gen_ocl_upsample(long hi, long lo); INLINE_OVERLOADABLE short upsample(char hi, uchar lo) { return __gen_ocl_upsample((short)hi, (short)lo); } INLINE_OVERLOADABLE ushort upsample(uchar hi, uchar lo) { return __gen_ocl_upsample((short)hi, (short)lo); } INLINE_OVERLOADABLE int upsample(short hi, ushort lo) { return __gen_ocl_upsample((int)hi, (int)lo); } INLINE_OVERLOADABLE uint upsample(ushort hi, ushort lo) { return __gen_ocl_upsample((int)hi, (int)lo); } INLINE_OVERLOADABLE long upsample(int hi, uint lo) { - return 0; + return __gen_ocl_upsample((long)hi, (long)lo); } INLINE_OVERLOADABLE ulong upsample(uint hi, uint lo) { - return 0; + return __gen_ocl_upsample((long)hi, (long)lo); } PURE CONST uint __gen_ocl_hadd(uint x, uint y);