From 552eae5f4c5d063038244ac3d11e2a0fc427e849 Mon Sep 17 00:00:00 2001 From: Zhigang Gong Date: Thu, 8 Aug 2013 15:15:43 +0800 Subject: [PATCH] GBE: fix insntruction scheduling related bugs in read64/write64. In read64 and write64, we allocate some temporary registers and we should put all of those temporary registers may be modified to the instruction's dst array. Otherwise, the latter post instruction scheduling may rearrange the instruction incorrectly. Signed-off-by: Zhigang Gong Reviewed-by: "Song, Ruiling" --- backend/src/backend/gen_context.cpp | 13 +++---- backend/src/backend/gen_encoder.cpp | 6 ---- backend/src/backend/gen_insn_selection.cpp | 57 +++++++++++++++++------------- 3 files changed, 39 insertions(+), 37 deletions(-) diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 621e7be..6de6cf1 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -601,10 +601,10 @@ namespace gbe void GenContext::emitRead64Instruction(const SelectionInstruction &insn) { const uint32_t elemNum = insn.extra.elem; const uint32_t tmpRegSize = (p->curr.execWidth == 8) ? elemNum * 2 : elemNum; - const GenRegister dst = ra->genReg(insn.dst(tmpRegSize)); - const GenRegister tmp = ra->genReg(insn.dst(0)); + const GenRegister tempAddr = ra->genReg(insn.dst(0)); + const GenRegister dst = ra->genReg(insn.dst(tmpRegSize + 1)); + const GenRegister tmp = ra->genReg(insn.dst(1)); const GenRegister src = ra->genReg(insn.src(0)); - const GenRegister tempAddr = ra->genReg(insn.src(1)); const uint32_t bti = insn.extra.function; p->READ64(dst, tmp, tempAddr, src, bti, elemNum); } @@ -621,11 +621,12 @@ namespace gbe // then follow the real destination registers. // For SIMD16, we allocate elemNum temporary registers from dst(0). void GenContext::emitWrite64Instruction(const SelectionInstruction &insn) { - const GenRegister src = ra->genReg(insn.src(0)); + const GenRegister src = ra->genReg(insn.dst(0)); const uint32_t elemNum = insn.extra.elem; - const uint32_t tmpRegSize = (p->curr.execWidth == 8) ? elemNum * 2 : elemNum; - const GenRegister data = ra->genReg(insn.src(tmpRegSize + 1)); + const GenRegister addr = ra->genReg(insn.src(0)); //tmpRegSize + 1)); + const GenRegister data = ra->genReg(insn.src(1)); const uint32_t bti = insn.extra.function; + p->MOV(src, addr); p->WRITE64(src, data, bti, elemNum); } diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index 4d6aa34..1a459e1 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -412,12 +412,6 @@ namespace gbe curr.noMask = originMask; this->UNTYPED_WRITE(msg, bti, elemNum); } - /* Let's restore the original message(addr) register. */ - /* XXX could be optimized if we don't allocate the address to the header - position of the message. */ - curr.predicate = GEN_PREDICATE_NONE; - curr.noMask = GEN_MASK_DISABLE; - ADD(msg, GenRegister::retype(msg, GEN_TYPE_UD), GenRegister::immd(-4)); pop(); } diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 1a3af68..a361ab3 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -479,7 +479,7 @@ namespace gbe /*! Read 64 bits float/int array */ void READ64(Reg addr, Reg tempAddr, const GenRegister *dst, uint32_t elemNum, uint32_t valueNum, uint32_t bti); /*! Write 64 bits float/int array */ - void WRITE64(Reg addr, const GenRegister *src, uint32_t elemNum, uint32_t valueNum, uint32_t bti); + void WRITE64(Reg addr, const GenRegister *src, uint32_t srcNum, const GenRegister *dst, uint32_t dstNum, uint32_t bti); /*! Untyped read (up to 4 elements) */ void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti); /*! Untyped write (up to 4 elements) */ @@ -825,28 +825,29 @@ namespace gbe /* elemNum contains all the temporary register and the real destination registers.*/ void Selection::Opaque::READ64(Reg addr, - Reg tempAddr, - const GenRegister *dst, - uint32_t elemNum, - uint32_t valueNum, - uint32_t bti) + Reg tempAddr, + const GenRegister *dst, + uint32_t elemNum, + uint32_t valueNum, + uint32_t bti) { - SelectionInstruction *insn = this->appendInsn(SEL_OP_READ64, elemNum, 2); + SelectionInstruction *insn = this->appendInsn(SEL_OP_READ64, elemNum + 1, 1); SelectionVector *srcVector = this->appendVector(); SelectionVector *dstVector = this->appendVector(); + /* temporary addr register is to be modified, set it to dst registers.*/ + insn->dst(0) = tempAddr; // Regular instruction to encode for (uint32_t elemID = 0; elemID < elemNum; ++elemID) - insn->dst(elemID) = dst[elemID]; + insn->dst(elemID + 1) = dst[elemID]; insn->src(0) = addr; - insn->src(1) = tempAddr; insn->extra.function = bti; insn->extra.elem = valueNum; // Only the temporary registers need contiguous allocation dstVector->regNum = elemNum - valueNum; dstVector->isSrc = 0; - dstVector->reg = &insn->dst(0); + dstVector->reg = &insn->dst(1); // Source cannot be scalar (yet) srcVector->regNum = 1; @@ -883,24 +884,27 @@ namespace gbe /* elemNum contains all the temporary register and the real data registers.*/ void Selection::Opaque::WRITE64(Reg addr, - const GenRegister *src, - uint32_t elemNum, - uint32_t valueNum, - uint32_t bti) + const GenRegister *src, + uint32_t srcNum, + const GenRegister *dst, + uint32_t dstNum, + uint32_t bti) { - SelectionInstruction *insn = this->appendInsn(SEL_OP_WRITE64, 0, elemNum+1); + SelectionInstruction *insn = this->appendInsn(SEL_OP_WRITE64, dstNum, srcNum + 1); SelectionVector *vector = this->appendVector(); // Regular instruction to encode insn->src(0) = addr; - for (uint32_t elemID = 0; elemID < elemNum; ++elemID) - insn->src(elemID+1) = src[elemID]; + for (uint32_t elemID = 0; elemID < srcNum; ++elemID) + insn->src(elemID + 1) = src[elemID]; + for (uint32_t elemID = 0; elemID < dstNum; ++elemID) + insn->dst(elemID) = dst[elemID]; insn->extra.function = bti; - insn->extra.elem = valueNum; + insn->extra.elem = srcNum; // Only the addr + temporary registers need to be contiguous. - vector->regNum = (elemNum - valueNum) + 1; - vector->reg = &insn->src(0); + vector->regNum = dstNum; + vector->reg = &insn->dst(0); vector->isSrc = 1; } @@ -2085,13 +2089,16 @@ namespace gbe addr = GenRegister::retype(sel.selReg(insn.getSrc(addrID)), GEN_TYPE_F); // The first 16 DWORD register space is for temporary usage at encode stage. uint32_t tmpRegNum = (sel.ctx.getSimdWidth() == 8) ? valueNum * 2 : valueNum; - GenRegister src[valueNum + tmpRegNum]; + GenRegister src[valueNum]; + GenRegister dst[tmpRegNum + 1]; + /* dst 0 is for the temporary address register. */ + dst[0] = sel.selReg(sel.reg(FAMILY_DWORD)); for (srcID = 0; srcID < tmpRegNum; ++srcID) - src[srcID] = sel.selReg(sel.reg(FAMILY_DWORD)); + dst[srcID + 1] = sel.selReg(sel.reg(FAMILY_DWORD)); - for (uint32_t valueID = 0; valueID < valueNum; ++srcID, ++valueID) - src[srcID] = sel.selReg(insn.getValue(valueID)); - sel.WRITE64(addr, src, valueNum + tmpRegNum, valueNum, bti); + for (uint32_t valueID = 0; valueID < valueNum; ++valueID) + src[valueID] = sel.selReg(insn.getValue(valueID)); + sel.WRITE64(addr, src, valueNum, dst, tmpRegNum + 1, bti); } void emitByteScatter(Selection::Opaque &sel, -- 2.7.4