From: Homer Hsing Date: Tue, 13 Aug 2013 03:05:28 +0000 (+0800) Subject: support 64bit-integer multiplication X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=628baebaf0dd22a0fa949760f5553a1baa1fb43e;p=contrib%2Fbeignet.git support 64bit-integer multiplication also add test case Signed-off-by: Homer Hsing Reviewed-by: Zhigang Gong --- diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index c8066e0..4246423 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -598,6 +598,52 @@ namespace gbe p->pop(); } + void GenContext::I32FullMult(GenRegister high, GenRegister low, GenRegister src0, GenRegister src1) { + GenRegister acc = GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD); + int execWidth = p->curr.execWidth; + p->push(); + p->curr.execWidth = 8; + for(int i = 0; i < execWidth; i += 8) { + p->MUL(acc, src0, src1); + p->curr.accWrEnable = 1; + p->MACH(high, src0, src1); + p->curr.accWrEnable = 0; + p->MOV(low, acc); + src0 = GenRegister::suboffset(src0, 8); + src1 = GenRegister::suboffset(src1, 8); + high = GenRegister::suboffset(high, 8); + low = GenRegister::suboffset(low, 8); + } + p->pop(); + } + + void GenContext::emitI64MULInstruction(const SelectionInstruction &insn) { + GenRegister dest = ra->genReg(insn.dst(0)); + GenRegister x = ra->genReg(insn.src(0)); + GenRegister y = ra->genReg(insn.src(1)); + GenRegister a = ra->genReg(insn.dst(1)); + GenRegister b = ra->genReg(insn.dst(2)); + GenRegister c = ra->genReg(insn.dst(3)); + GenRegister d = ra->genReg(insn.dst(4)); + GenRegister e = ra->genReg(insn.dst(5)); + GenRegister f = ra->genReg(insn.dst(6)); + a.type = b.type = c.type = d.type = e.type = f.type = GEN_TYPE_UD; + loadTopHalf(a, x); + loadBottomHalf(b, x); + loadTopHalf(c, y); + loadBottomHalf(d, y); + p->push(); + p->curr.predicate = GEN_PREDICATE_NONE; + I32FullMult(GenRegister::null(), e, b, c); + I32FullMult(GenRegister::null(), f, a, d); + p->ADD(e, e, f); + I32FullMult(f, a, b, d); + p->ADD(e, e, f); + p->pop(); + storeTopHalf(dest, e); + storeBottomHalf(dest, a); + } + void GenContext::emitTernaryInstruction(const SelectionInstruction &insn) { const GenRegister dst = ra->genReg(insn.dst(0)); const GenRegister src0 = ra->genReg(insn.src(0)); diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index 150e5ff..9e7b384 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -85,6 +85,7 @@ namespace gbe void addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1); void subWithBorrow(GenRegister dest, GenRegister src0, GenRegister src1); + void I32FullMult(GenRegister high, GenRegister low, GenRegister src0, GenRegister src1); /*! Final Gen ISA emission helper functions */ void emitLabelInstruction(const SelectionInstruction &insn); @@ -115,8 +116,10 @@ namespace gbe void emitSpillRegInstruction(const SelectionInstruction &insn); void emitUnSpillRegInstruction(const SelectionInstruction &insn); void emitGetImageInfoInstruction(const SelectionInstruction &insn); + void emitI64MULInstruction(const SelectionInstruction &insn); void scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode); void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode); + /*! Implements base class */ virtual Kernel *allocateKernel(void); /*! Store the position of each label instruction in the Gen ISA stream */ diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx index 4879b66..7f214ac 100644 --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx @@ -27,3 +27,4 @@ DECL_GEN7_SCHEDULE(SpillReg, 80, 1, 1) DECL_GEN7_SCHEDULE(UnSpillReg, 80, 1, 1) DECL_GEN7_SCHEDULE(GetImageInfo, 20, 4, 2) DECL_GEN7_SCHEDULE(Atomic, 80, 1, 1) +DECL_GEN7_SCHEDULE(I64MUL, 20, 4, 2) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 1e72937..38f56b5 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -523,6 +523,8 @@ namespace gbe void TYPED_WRITE(GenRegister *src, uint32_t srcNum, GenRegister *msgs, uint32_t msgNum, uint32_t bti); /*! Get image information */ void GET_IMAGE_INFO(uint32_t type, GenRegister *dst, uint32_t dst_num, uint32_t bti); + /*! Multiply 64-bit integers */ + void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]); /*! Use custom allocators */ GBE_CLASS(Opaque); friend class SelectionBlock; @@ -1003,6 +1005,15 @@ namespace gbe insn->extra.function = function; } + void Selection::Opaque::I64MUL(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MUL, 7, 2); + insn->dst(0) = dst; + insn->src(0) = src0; + insn->src(1) = src1; + for(int i = 0; i < 6; i++) + insn->dst(i + 1) = tmp[i]; + } + void Selection::Opaque::ALU1(SelectionOpcode opcode, Reg dst, Reg src) { SelectionInstruction *insn = this->appendInsn(opcode, 1, 1); insn->dst(0) = dst; @@ -1610,12 +1621,14 @@ namespace gbe if (type == TYPE_U32 || type == TYPE_S32) { sel.pop(); return false; - } - else { - GBE_ASSERTM((type != TYPE_S64 && type != TYPE_U64), "64bit integer not supported yet!" ); + } else if (type == TYPE_S64 || type == TYPE_U64) { + GenRegister tmp[6]; + for(int i = 0; i < 6; i++) + tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); + sel.I64MUL(dst, src0, src1, tmp); + } else sel.MUL(dst, src0, src1); - } - break; + break; case OP_HADD: { GenRegister temp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_D); sel.HADD(dst, src0, src1, temp); diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index 06469ca..6ef50b8 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -30,6 +30,7 @@ DECL_SELECTION_IR(ADD, BinaryInstruction) DECL_SELECTION_IR(I64ADD, BinaryWithTempInstruction) DECL_SELECTION_IR(I64SUB, BinaryWithTempInstruction) DECL_SELECTION_IR(MUL, BinaryInstruction) +DECL_SELECTION_IR(I64MUL, I64MULInstruction) DECL_SELECTION_IR(ATOMIC, AtomicInstruction) DECL_SELECTION_IR(MACH, BinaryInstruction) DECL_SELECTION_IR(CMP, CompareInstruction) diff --git a/kernels/compiler_long_mult.cl b/kernels/compiler_long_mult.cl new file mode 100644 index 0000000..5b96d74 --- /dev/null +++ b/kernels/compiler_long_mult.cl @@ -0,0 +1,7 @@ +kernel void compiler_long_mult(global long *src1, global long *src2, global long *dst) { + int i = get_global_id(0); + if(i < 3) + dst[i] = src1[i] + src2[i]; + else + dst[i] = src1[i] * src2[i]; +} diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index fe1f6fe..12ebe18 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -130,6 +130,7 @@ set (utests_sources compiler_long_shl.cpp compiler_long_shr.cpp compiler_long_asr.cpp + compiler_long_mult.cpp utest_assert.cpp utest.cpp utest_file_map.cpp diff --git a/utests/compiler_long_mult.cpp b/utests/compiler_long_mult.cpp new file mode 100644 index 0000000..06070f7 --- /dev/null +++ b/utests/compiler_long_mult.cpp @@ -0,0 +1,49 @@ +#include +#include +#include +#include "utest_helper.hpp" + +void compiler_long_mult(void) +{ + const size_t n = 16; + int64_t src1[n], src2[n]; + + // Setup kernel and buffers + OCL_CREATE_KERNEL("compiler_long_mult"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int64_t), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL); + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int64_t), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); + globals[0] = n; + locals[0] = 16; + + // Run random tests + for (int32_t i = 0; i < (int32_t) n; ++i) { + src1[i] = 0x77665544FFEEDDCCLL; + src2[i] = ((int64_t)rand() << 32) + rand(); + } + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + memcpy(buf_data[0], src1, sizeof(src1)); + memcpy(buf_data[1], src2, sizeof(src2)); + OCL_UNMAP_BUFFER(0); + OCL_UNMAP_BUFFER(1); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Compare + OCL_MAP_BUFFER(2); + for (int32_t i = 0; i < (int32_t) n; ++i) { + //printf("%lx\n", ((int64_t *)buf_data[2])[i]); + if (i < 3) + OCL_ASSERT(src1[i] + src2[i] == ((int64_t *)buf_data[2])[i]); + else + OCL_ASSERT(src1[i] * src2[i] == ((int64_t *)buf_data[2])[i]); + } + OCL_UNMAP_BUFFER(2); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_long_mult);