From 3ae20b67ab88bd595a6dca9814dd642c5d056d9f Mon Sep 17 00:00:00 2001 From: Homer Hsing Date: Fri, 13 Sep 2013 09:41:02 +0800 Subject: [PATCH] support converting 64-bit integer to 32-bit float version 2: improve algorithm to convert signed integer fix source operand type in llvm_gen_backend enable predicate in addWithCarry change test case to test signed integer Signed-off-by: Homer Hsing Reviewed-by: "Yang, Rong R" --- backend/src/backend/gen_context.cpp | 45 +++++++++++++++++++++- backend/src/backend/gen_context.hpp | 2 + .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 + backend/src/backend/gen_insn_selection.cpp | 17 ++++++++ backend/src/backend/gen_insn_selection.hxx | 1 + backend/src/llvm/llvm_gen_backend.cpp | 2 +- kernels/compiler_long_convert.cl | 5 +++ utests/compiler_long_convert.cpp | 40 +++++++++++++++++++ 8 files changed, 111 insertions(+), 2 deletions(-) diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 0d584df..a1df963 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -578,6 +578,49 @@ namespace gbe p->pop(); } + void GenContext::UnsignedI64ToFloat(GenRegister dst, GenRegister high, GenRegister low, GenRegister tmp) { + p->MOV(dst, high); + p->MUL(dst, dst, GenRegister::immf(65536.f * 65536.f)); + tmp.type = GEN_TYPE_F; + p->MOV(tmp, low); + p->ADD(dst, dst, tmp); + } + + void GenContext::emitI64ToFloatInstruction(const SelectionInstruction &insn) { + GenRegister src = ra->genReg(insn.src(0)); + GenRegister dest = ra->genReg(insn.dst(0)); + GenRegister high = ra->genReg(insn.dst(1)); + GenRegister low = ra->genReg(insn.dst(2)); + GenRegister tmp = ra->genReg(insn.dst(3)); + loadTopHalf(high, src); + loadBottomHalf(low, src); + if(!src.is_signed_int()) { + UnsignedI64ToFloat(dest, high, low, tmp); + } else { + p->push(); + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.physicalFlag = 1; + p->curr.flag = 1; + p->curr.subFlag = 0; + p->CMP(GEN_CONDITIONAL_GE, high, GenRegister::immud(0x80000000)); + p->curr.predicate = GEN_PREDICATE_NORMAL; + p->NOT(high, high); + p->NOT(low, low); + p->MOV(tmp, GenRegister::immud(1)); + addWithCarry(low, low, tmp); + p->ADD(high, high, tmp); + p->pop(); + UnsignedI64ToFloat(dest, high, low, tmp); + p->push(); + p->curr.physicalFlag = 1; + p->curr.flag = 1; + p->curr.subFlag = 0; + dest.type = GEN_TYPE_UD; + p->OR(dest, dest, GenRegister::immud(0x80000000)); + p->pop(); + } + } + void GenContext::emitI64CompareInstruction(const SelectionInstruction &insn) { GenRegister src0 = ra->genReg(insn.src(0)); GenRegister src1 = ra->genReg(insn.src(1)); @@ -728,11 +771,11 @@ namespace gbe int execWidth = p->curr.execWidth; GenRegister acc0 = GenRegister::retype(GenRegister::acc(), GEN_TYPE_D); p->push(); - p->curr.predicate = GEN_PREDICATE_NONE; p->curr.execWidth = 8; p->ADDC(dest, src0, src1); p->MOV(src1, acc0); if (execWidth == 16) { + p->curr.quarterControl = 1; p->ADDC(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src0, 8), GenRegister::suboffset(src1, 8)); diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index 4601242..6b37276 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -88,6 +88,7 @@ namespace gbe void subWithBorrow(GenRegister dest, GenRegister src0, GenRegister src1); void I32FullMult(GenRegister high, GenRegister low, GenRegister src0, GenRegister src1); void saveFlag(GenRegister dest, int flag, int subFlag); + void UnsignedI64ToFloat(GenRegister dst, GenRegister high, GenRegister low, GenRegister tmp); /*! Final Gen ISA emission helper functions */ void emitLabelInstruction(const SelectionInstruction &insn); @@ -99,6 +100,7 @@ namespace gbe void emitI64HADDInstruction(const SelectionInstruction &insn); void emitI64ShiftInstruction(const SelectionInstruction &insn); void emitI64CompareInstruction(const SelectionInstruction &insn); + void emitI64ToFloatInstruction(const SelectionInstruction &insn); void emitCompareInstruction(const SelectionInstruction &insn); void emitJumpInstruction(const SelectionInstruction &insn); void emitIndirectMoveInstruction(const SelectionInstruction &insn); diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx index 445b461..49b3170 100644 --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx @@ -7,6 +7,7 @@ DECL_GEN7_SCHEDULE(BinaryWithTemp, 20, 4, 2) DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2) DECL_GEN7_SCHEDULE(I64Shift, 20, 4, 2) DECL_GEN7_SCHEDULE(I64HADD, 20, 4, 2) +DECL_GEN7_SCHEDULE(I64ToFloat, 20, 4, 2) DECL_GEN7_SCHEDULE(Compare, 20, 4, 2) DECL_GEN7_SCHEDULE(I64Compare, 20, 4, 2) DECL_GEN7_SCHEDULE(Jump, 14, 1, 1) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 1bb1f46..241164b 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -469,6 +469,8 @@ namespace gbe #undef ALU2WithTemp #undef ALU3 #undef I64Shift + /*! Convert 64-bit integer to 32-bit float */ + void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[3]); /*! (x+y)>>1 without mod. overflow */ void I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]); /*! Shift a 64-bit integer */ @@ -1075,6 +1077,14 @@ namespace gbe insn->extra.function = conditional; } + void Selection::Opaque::CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[3]) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_CONVI64_TO_F, 4, 1); + insn->dst(0) = dst; + insn->src(0) = src; + for(int i = 0; i < 3; i ++) + insn->dst(i + 1) = tmp[i]; + } + void Selection::Opaque::I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]) { SelectionInstruction *insn = this->appendInsn(SEL_OP_I64HADD, 5, 2); insn->dst(0) = dst; @@ -2421,6 +2431,13 @@ namespace gbe sel.MOV(dst, unpacked); } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && srcFamily == FAMILY_QWORD) { sel.CONVI64_TO_I(dst, src); + } else if (dstType == ir::TYPE_FLOAT && srcFamily == FAMILY_QWORD) { + GenRegister tmp[3]; + for(int i=0; i<3; i++) { + tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); + tmp[i].type = GEN_TYPE_UD; + } + sel.CONVI64_TO_F(dst, src, tmp); } else if (dst.isdf()) { ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD); sel.MOV_DF(dst, src, sel.selReg(r)); diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index d3f21d6..b411ed2 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -68,3 +68,4 @@ DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction) DECL_SELECTION_IR(UPSAMPLE_LONG, BinaryInstruction) DECL_SELECTION_IR(CONVI_TO_I64, UnaryWithTempInstruction) DECL_SELECTION_IR(CONVI64_TO_I, UnaryInstruction) +DECL_SELECTION_IR(CONVI64_TO_F, I64ToFloatInstruction) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 3c04565..c98f563 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -1516,7 +1516,7 @@ namespace gbe Type *llvmSrcType = I.getOperand(0)->getType(); const ir::Type dstType = getType(ctx, llvmDstType); ir::Type srcType; - if (I.getOpcode() == Instruction::ZExt) { + if (I.getOpcode() == Instruction::ZExt || I.getOpcode() == Instruction::UIToFP) { srcType = getUnsignedType(ctx, llvmSrcType); } else { srcType = getType(ctx, llvmSrcType); diff --git a/kernels/compiler_long_convert.cl b/kernels/compiler_long_convert.cl index 03df147..e5f7939 100644 --- a/kernels/compiler_long_convert.cl +++ b/kernels/compiler_long_convert.cl @@ -12,3 +12,8 @@ kernel void compiler_long_convert_2(global char *dst1, global short *dst2, globa dst2[i] = src[i]; dst3[i] = src[i]; } + +kernel void compiler_long_convert_to_float(global float *dst, global long *src) { + int i = get_global_id(0); + dst[i] = src[i]; +} diff --git a/utests/compiler_long_convert.cpp b/utests/compiler_long_convert.cpp index fe976be..827a45b 100644 --- a/utests/compiler_long_convert.cpp +++ b/utests/compiler_long_convert.cpp @@ -116,3 +116,43 @@ void compiler_long_convert_2(void) } MAKE_UTEST_FROM_FUNCTION(compiler_long_convert_2); + +// convert 64-bit integer to 32-bit float +void compiler_long_convert_to_float(void) +{ + const size_t n = 16; + int64_t src[n]; + + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_long_convert", "compiler_long_convert_to_float"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + globals[0] = n; + locals[0] = 16; + + // Run random tests + for (int32_t i = 0; i < (int32_t) n; ++i) { + src[i] = -(int64_t)i; + } + OCL_MAP_BUFFER(1); + memcpy(buf_data[1], src, sizeof(src)); + OCL_UNMAP_BUFFER(1); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Compare + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + float *dst = ((float *)buf_data[0]); + for (int32_t i = 0; i < (int32_t) n; ++i) { + //printf("%f\n", dst[i]); + OCL_ASSERT(dst[i] == src[i]); + } + OCL_UNMAP_BUFFER(0); + OCL_UNMAP_BUFFER(1); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_long_convert_to_float); -- 2.7.4