From 99d079552ecfca2611b94a1fe51da181b2dd7d9d Mon Sep 17 00:00:00 2001 From: Zhigang Gong Date: Wed, 21 May 2014 16:27:55 +0800 Subject: [PATCH] GBE: fix post scheduling related bug for spill/unspill. spill/unspill instruction touch some registers directly which are not in dst/src. This breaks the post scheduling. Simply work around it by add all the reserved registers to the dst array. The scratch memory is not correctly indexed and the barrier is not handled properly. After this patch, the post scheduling will be enabled by default. Signed-off-by: Zhigang Gong Reviewed-by: "Yang, Rong R" --- backend/src/backend/gen_insn_scheduling.cpp | 16 +++++++++++++--- backend/src/backend/gen_insn_selection.cpp | 20 +++++++++++++++++--- backend/src/backend/gen_reg_allocation.cpp | 2 +- 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp index e41b84e..6f37ed4 100644 --- a/backend/src/backend/gen_insn_scheduling.cpp +++ b/backend/src/backend/gen_insn_scheduling.cpp @@ -138,6 +138,7 @@ namespace gbe enum GenMemory : uint8_t { GLOBAL_MEMORY = 0, LOCAL_MEMORY, + SCRATCH_MEMORY, MAX_MEM_SYSTEM }; @@ -348,7 +349,7 @@ namespace gbe uint32_t DependencyTracker::getIndex(uint32_t bti) const { const uint32_t memDelta = grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER; - return bti == 0xfe ? memDelta + LOCAL_MEMORY : memDelta + GLOBAL_MEMORY; + return bti == 0xfe ? memDelta + LOCAL_MEMORY : (bti == 0xff ? memDelta + SCRATCH_MEMORY : memDelta + GLOBAL_MEMORY); } void DependencyTracker::updateWrites(ScheduleDAGNode *node) { @@ -383,6 +384,7 @@ namespace gbe this->nodes[index] = node; } + // Track writes in scratch memory if(insn.opcode == SEL_OP_SPILL_REG) { const uint32_t index = this->getIndex(0xff); this->nodes[index] = node; @@ -548,6 +550,12 @@ namespace gbe tracker.addDependency(index, node, WRITE_AFTER_READ); } + // write-after-read in scratch memory + if (insn.opcode == SEL_OP_UNSPILL_REG) { + const uint32_t index = tracker.getIndex(0xff); + tracker.addDependency(index, node, WRITE_AFTER_READ); + } + // Consider barriers and wait are reading memory (local and global) if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_FENCE || @@ -573,7 +581,9 @@ namespace gbe // Make labels and branches non-schedulable (i.e. they act as barriers) for (int32_t insnID = 0; insnID < insnNum; ++insnID) { ScheduleDAGNode *node = tracker.insnNodes[insnID]; - if (node->insn.isBranch() || node->insn.isLabel() || node->insn.opcode == SEL_OP_EOT || node->insn.opcode == SEL_OP_IF) + if (node->insn.isBranch() || node->insn.isLabel() + || node->insn.opcode == SEL_OP_EOT || node->insn.opcode == SEL_OP_IF + || node->insn.opcode == SEL_OP_BARRIER) tracker.makeBarrier(insnID, insnNum); } @@ -681,7 +691,7 @@ namespace gbe } } - BVAR(OCL_POST_ALLOC_INSN_SCHEDULE, false); + BVAR(OCL_POST_ALLOC_INSN_SCHEDULE, true); BVAR(OCL_PRE_ALLOC_INSN_SCHEDULE, false); void schedulePostRegAllocation(GenContext &ctx, Selection &selection) { diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 3210b3c..13412f7 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -778,19 +778,28 @@ namespace gbe << (uint32_t)poolOffset << std::endl; return false; } + // FIXME, to support post register allocation scheduling, + // put all the reserved register to the spill/unspill's destination registers. + // This is not the best way. We need to refine the spill/unspill instruction to + // only use passed in registers and don't access hard coded offset in the future. while(!regSet.empty()) { struct RegSlot regSlot = regSet.back(); regSet.pop_back(); const GenRegister selReg = insn.src(regSlot.srcID); if (!regSlot.isTmpReg) { /* For temporary registers, we don't need to unspill. */ - SelectionInstruction *unspill = this->create(SEL_OP_UNSPILL_REG, 1, 0); + SelectionInstruction *unspill = this->create(SEL_OP_UNSPILL_REG, + 1 + (ctx.reservedSpillRegs * 8) / ctx.getSimdWidth(), 0); unspill->state = GenInstructionState(simdWidth); unspill->state.noMask = 1; unspill->dst(0) = GenRegister(GEN_GENERAL_REGISTER_FILE, registerPool + regSlot.poolOffset, 0, selReg.type, selReg.vstride, selReg.width, selReg.hstride); + for(uint32_t i = 1; i < 1 + (ctx.reservedSpillRegs * 8) / ctx.getSimdWidth(); i++) + unspill->dst(i) = ctx.getSimdWidth() == 8 ? + GenRegister::vec8(GEN_GENERAL_REGISTER_FILE, registerPool + (i - 1), 0 ) : + GenRegister::vec16(GEN_GENERAL_REGISTER_FILE, registerPool + (i - 1) * 2, 0); unspill->extra.scratchOffset = regSlot.addr + selReg.quarter * 4 * simdWidth; unspill->extra.scratchMsgHeader = registerPool; insn.prepend(*unspill); @@ -826,7 +835,7 @@ namespace gbe struct RegSlot regSlot(reg, dstID, poolOffset, it->second.isTmpReg, it->second.addr); - if(family == ir::FAMILY_QWORD) poolOffset += 2 * simdWidth / 8; + if (family == ir::FAMILY_QWORD) poolOffset += 2 * simdWidth / 8; else poolOffset += simdWidth / 8; regSet.push_back(regSlot); } @@ -845,7 +854,8 @@ namespace gbe const GenRegister selReg = insn.dst(regSlot.dstID); if(!regSlot.isTmpReg) { /* For temporary registers, we don't need to unspill. */ - SelectionInstruction *spill = this->create(SEL_OP_SPILL_REG, 0, 1); + SelectionInstruction *spill = this->create(SEL_OP_SPILL_REG, + (ctx.reservedSpillRegs * 8) / ctx.getSimdWidth() , 1); spill->state = insn.state;//GenInstructionState(simdWidth); spill->state.accWrEnable = 0; spill->state.saturate = 0; @@ -857,6 +867,10 @@ namespace gbe selReg.width, selReg.hstride); spill->extra.scratchOffset = regSlot.addr + selReg.quarter * 4 * simdWidth; spill->extra.scratchMsgHeader = registerPool; + for(uint32_t i = 0; i < 0 + (ctx.reservedSpillRegs * 8) / ctx.getSimdWidth(); i++) + spill->dst(i) = ctx.getSimdWidth() == 8 ? + GenRegister::vec8(GEN_GENERAL_REGISTER_FILE, registerPool + (i), 0 ) : + GenRegister::vec16(GEN_GENERAL_REGISTER_FILE, registerPool + (i) * 2, 0); insn.append(*spill); } diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp index b23ab92..f642c2e 100644 --- a/backend/src/backend/gen_reg_allocation.cpp +++ b/backend/src/backend/gen_reg_allocation.cpp @@ -736,7 +736,7 @@ namespace gbe for(uint32_t i = 0; i < regNum; i++) { const GenRegInterval * cur = starting[i]; const GenRegInterval * exp = ending[toExpire]; - if(exp->maxID < cur->minID) { + if (exp->maxID < cur->minID) { auto it = spilledRegs.find(exp->reg); GBE_ASSERT(it != spilledRegs.end()); if(it->second.addr != -1) { -- 2.7.4