From 2c6bc5813bcb88dba4e1fd42455e718a96937093 Mon Sep 17 00:00:00 2001 From: Homer Hsing Date: Wed, 11 Sep 2013 11:21:37 +0800 Subject: [PATCH] add 64-bit version of "rhadd" v2: keep highest carry bit tested by piglit test cases: piglit/framework/../bin/cl-program-tester generated_tests/cl/builtin/int/builtin-ulong-rhadd-1.0.generated.cl piglit/framework/../bin/cl-program-tester generated_tests/cl/builtin/int/builtin-long-rhadd-1.0.generated.cl Signed-off-by: Homer Hsing Reviewed-by: "Song, Ruiling" --- backend/src/backend/gen_context.cpp | 30 ++++++++++++++++++++++ backend/src/backend/gen_context.hpp | 1 + .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 + backend/src/backend/gen_insn_selection.cpp | 19 ++++++++++++++ backend/src/backend/gen_insn_selection.hxx | 1 + backend/src/ir/instruction.cpp | 1 + backend/src/ir/instruction.hpp | 2 ++ backend/src/ir/instruction.hxx | 1 + backend/src/llvm/llvm_gen_backend.cpp | 11 ++++++++ backend/src/llvm/llvm_gen_ocl_function.hxx | 3 ++- backend/src/ocl_stdlib.tmpl.h | 15 ++++++++--- 11 files changed, 80 insertions(+), 5 deletions(-) diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index a1df963..da0b219 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -471,6 +471,36 @@ namespace gbe storeTopHalf(dest, c); } + void GenContext::emitI64RHADDInstruction(const SelectionInstruction &insn) { + GenRegister dest = ra->genReg(insn.dst(0)); + GenRegister x = ra->genReg(insn.src(0)); + GenRegister y = ra->genReg(insn.src(1)); + GenRegister a = ra->genReg(insn.dst(1)); + GenRegister b = ra->genReg(insn.dst(2)); + GenRegister c = ra->genReg(insn.dst(3)); + GenRegister d = ra->genReg(insn.dst(4)); + a.type = b.type = c.type = d.type = GEN_TYPE_UD; + loadBottomHalf(a, x); + loadBottomHalf(b, y); + addWithCarry(a, a, b); + p->MOV(c, GenRegister::immud(1)); + addWithCarry(a, a, c); + p->ADD(b, b, c); + loadTopHalf(c, x); + loadTopHalf(d, y); + addWithCarry(c, c, b); + addWithCarry(c, c, d); + p->ADD(b, b, d); + p->SHR(a, a, GenRegister::immud(1)); + p->SHL(d, c, GenRegister::immud(31)); + p->OR(a, a, d); + p->SHR(c, c, GenRegister::immud(1)); + p->SHL(d, b, GenRegister::immud(31)); + p->OR(c, c, d); + storeBottomHalf(dest, a); + storeTopHalf(dest, c); + } + void GenContext::emitI64ShiftInstruction(const SelectionInstruction &insn) { GenRegister dest = ra->genReg(insn.dst(0)); GenRegister x = ra->genReg(insn.src(0)); diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index 6b37276..1de0b3d 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -98,6 +98,7 @@ namespace gbe void emitBinaryWithTempInstruction(const SelectionInstruction &insn); void emitTernaryInstruction(const SelectionInstruction &insn); void emitI64HADDInstruction(const SelectionInstruction &insn); + void emitI64RHADDInstruction(const SelectionInstruction &insn); void emitI64ShiftInstruction(const SelectionInstruction &insn); void emitI64CompareInstruction(const SelectionInstruction &insn); void emitI64ToFloatInstruction(const SelectionInstruction &insn); diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx index 49b3170..a4ba90b 100644 --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx @@ -7,6 +7,7 @@ DECL_GEN7_SCHEDULE(BinaryWithTemp, 20, 4, 2) DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2) DECL_GEN7_SCHEDULE(I64Shift, 20, 4, 2) DECL_GEN7_SCHEDULE(I64HADD, 20, 4, 2) +DECL_GEN7_SCHEDULE(I64RHADD, 20, 4, 2) DECL_GEN7_SCHEDULE(I64ToFloat, 20, 4, 2) DECL_GEN7_SCHEDULE(Compare, 20, 4, 2) DECL_GEN7_SCHEDULE(I64Compare, 20, 4, 2) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 241164b..b0b48ba 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -473,6 +473,8 @@ namespace gbe void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[3]); /*! (x+y)>>1 without mod. overflow */ void I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]); + /*! (x+y+1)>>1 without mod. overflow */ + void I64RHADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]); /*! Shift a 64-bit integer */ void I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[6]); /*! Compare 64-bit integer */ @@ -1094,6 +1096,15 @@ namespace gbe insn->dst(i + 1) = tmp[i]; } + void Selection::Opaque::I64RHADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_I64RHADD, 5, 2); + insn->dst(0) = dst; + insn->src(0) = src0; + insn->src(1) = src1; + for(int i = 0; i < 4; i ++) + insn->dst(i + 1) = tmp[i]; + } + void Selection::Opaque::I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) { SelectionInstruction *insn = this->appendInsn(opcode, 7, 2); insn->dst(0) = dst; @@ -1696,6 +1707,14 @@ namespace gbe sel.I64HADD(dst, src0, src1, tmp); break; } + case OP_I64RHADD: + { + GenRegister tmp[4]; + for(int i=0; i<4; i++) + tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); + sel.I64RHADD(dst, src0, src1, tmp); + break; + } case OP_UPSAMPLE_SHORT: sel.UPSAMPLE_SHORT(dst, src0, src1); break; diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index b411ed2..c4cf652 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -63,6 +63,7 @@ DECL_SELECTION_IR(FBL, UnaryInstruction) DECL_SELECTION_IR(HADD, BinaryWithTempInstruction) DECL_SELECTION_IR(RHADD, BinaryWithTempInstruction) DECL_SELECTION_IR(I64HADD, I64HADDInstruction) +DECL_SELECTION_IR(I64RHADD, I64RHADDInstruction) DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction) DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction) DECL_SELECTION_IR(UPSAMPLE_LONG, BinaryInstruction) diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index b93fc72..ff6af98 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -1349,6 +1349,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType()) DECL_EMIT_FUNCTION(HADD) DECL_EMIT_FUNCTION(RHADD) DECL_EMIT_FUNCTION(I64HADD) + DECL_EMIT_FUNCTION(I64RHADD) #undef DECL_EMIT_FUNCTION diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index fab6d73..2a06f76 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -535,6 +535,8 @@ namespace ir { Instruction RHADD(Type type, Register dst, Register src0, Register src1); /*! i64hadd.type dst src */ Instruction I64HADD(Type type, Register dst, Register src0, Register src1); + /*! i64rhadd.type dst src */ + Instruction I64RHADD(Type type, Register dst, Register src0, Register src1); /*! tan.type dst src */ Instruction RCP(Type type, Register dst, Register src); /*! abs.type dst src */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 6af0899..7ead344 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -78,6 +78,7 @@ DECL_INSN(FBL, UnaryInstruction) DECL_INSN(HADD, BinaryInstruction) DECL_INSN(RHADD, BinaryInstruction) DECL_INSN(I64HADD, BinaryInstruction) +DECL_INSN(I64RHADD, BinaryInstruction) DECL_INSN(UPSAMPLE_SHORT, BinaryInstruction) DECL_INSN(UPSAMPLE_INT, BinaryInstruction) DECL_INSN(UPSAMPLE_LONG, BinaryInstruction) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index c98f563..f51f05a 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -1842,6 +1842,7 @@ namespace gbe case GEN_OCL_HADD: case GEN_OCL_RHADD: case GEN_OCL_I64HADD: + case GEN_OCL_I64RHADD: this->newRegister(&I); break; default: @@ -2296,6 +2297,16 @@ namespace gbe ctx.RHADD(getUnsignedType(ctx, I.getType()), dst, src0, src1); break; } + case GEN_OCL_I64RHADD: + { + GBE_ASSERT(AI != AE); + const ir::Register src0 = this->getRegister(*(AI++)); + GBE_ASSERT(AI != AE); + const ir::Register src1 = this->getRegister(*(AI++)); + const ir::Register dst = this->getRegister(&I); + ctx.I64RHADD(ir::TYPE_U64, dst, src0, src1); + break; + } default: break; } } diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 13d8f66..58df2b0 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -132,8 +132,9 @@ DECL_LLVM_GEN_FUNCTION(FBH, __gen_ocl_fbh) DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl) DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_abs) DECL_LLVM_GEN_FUNCTION(HADD, _Z14__gen_ocl_haddjj) -DECL_LLVM_GEN_FUNCTION(RHADD, __gen_ocl_rhadd) +DECL_LLVM_GEN_FUNCTION(RHADD, _Z15__gen_ocl_rhaddjj) DECL_LLVM_GEN_FUNCTION(I64HADD, _Z14__gen_ocl_haddmm) +DECL_LLVM_GEN_FUNCTION(I64RHADD, _Z15__gen_ocl_rhaddmm) DECL_LLVM_GEN_FUNCTION(UPSAMPLE_SHORT, _Z18__gen_ocl_upsampless) DECL_LLVM_GEN_FUNCTION(UPSAMPLE_INT, _Z18__gen_ocl_upsampleii) DECL_LLVM_GEN_FUNCTION(UPSAMPLE_LONG, _Z18__gen_ocl_upsamplell) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 0f91bf5..4008ad4 100644 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -463,7 +463,7 @@ INLINE_OVERLOADABLE ulong upsample(uint hi, uint lo) { } OVERLOADABLE uint __gen_ocl_hadd(uint x, uint y); -PURE CONST uint __gen_ocl_rhadd(uint x, uint y); +OVERLOADABLE uint __gen_ocl_rhadd(uint x, uint y); #define DEC DEF(char); DEF(uchar); DEF(short); DEF(ushort) #define DEF(type) INLINE_OVERLOADABLE type hadd(type x, type y) { return (x + y) >> 1; } DEC @@ -478,9 +478,14 @@ INLINE_OVERLOADABLE int hadd(int x, int y) { __gen_ocl_hadd((uint)x, (uint)y); } INLINE_OVERLOADABLE uint hadd(uint x, uint y) { return __gen_ocl_hadd(x, y); } -INLINE_OVERLOADABLE int rhadd(int x, int y) { return (x < 0 && y > 0) || (x > 0 && y < 0) ? ((x + y + 1) >> 1) : __gen_ocl_rhadd(x, y); } +INLINE_OVERLOADABLE int rhadd(int x, int y) { + return (x < 0 && y > 0) || (x > 0 && y < 0) ? + ((x + y + 1) >> 1) : + __gen_ocl_rhadd((uint)x, (uint)y); + } INLINE_OVERLOADABLE uint rhadd(uint x, uint y) { return __gen_ocl_rhadd(x, y); } OVERLOADABLE ulong __gen_ocl_hadd(ulong x, ulong y); +OVERLOADABLE ulong __gen_ocl_rhadd(ulong x, ulong y); INLINE_OVERLOADABLE long hadd(long x, long y) { return (x < 0 && y > 0) || (x > 0 && y < 0) ? ((x + y) >> 1) : @@ -490,10 +495,12 @@ INLINE_OVERLOADABLE ulong hadd(ulong x, ulong y) { return __gen_ocl_hadd(x, y); } INLINE_OVERLOADABLE long rhadd(long x, long y) { - return 0; + return (x < 0 && y > 0) || (x > 0 && y < 0) ? + ((x + y + 1) >> 1) : + __gen_ocl_rhadd((ulong)x, (ulong)y); } INLINE_OVERLOADABLE ulong rhadd(ulong x, ulong y) { - return 0; + return __gen_ocl_rhadd(x, y); } int __gen_ocl_abs(int x); -- 2.7.4