From 2ce0abf639dda9fc10965ef95ea93e2ad1e87c7c Mon Sep 17 00:00:00 2001 From: Zhigang Gong Date: Thu, 10 Apr 2014 00:05:26 +0800 Subject: [PATCH] GBE: Enable register spilling for SIMD16. Enable register spilling for SIMD16 mode. Introduce an new environment variable OCL_SIMD16_SPILL_THRESHOLD to control the threshold of simd 16 register spilling. Default value is 16, means when the spilled registers are more than 16, beignet will fallback to simd8. Signed-off-by: Zhigang Gong Reviewed-by: "Song, Ruiling" --- backend/src/backend/gen_insn_selection.cpp | 23 ++++++----- backend/src/backend/gen_reg_allocation.cpp | 61 ++++++++++++++++++++++++------ 2 files changed, 64 insertions(+), 20 deletions(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index fea0329..820fbad 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -697,6 +697,7 @@ namespace gbe if(insn.opcode == SEL_OP_SPILL_REG || insn.opcode == SEL_OP_UNSPILL_REG) continue; + const int simdWidth = insn.state.execWidth; const uint32_t srcNum = insn.srcNum, dstNum = insn.dstNum; struct RegSlot { @@ -730,9 +731,9 @@ namespace gbe it->second.isTmpReg, it->second.addr); if(family == ir::FAMILY_QWORD) { - poolOffset += 2; + poolOffset += 2 * simdWidth / 8; } else { - poolOffset += 1; + poolOffset += simdWidth / 8; } regSet.push_back(regSlot); } @@ -749,12 +750,13 @@ namespace gbe if (!regSlot.isTmpReg) { /* For temporary registers, we don't need to unspill. */ SelectionInstruction *unspill = this->create(SEL_OP_UNSPILL_REG, 1, 0); - unspill->state = GenInstructionState(ctx.getSimdWidth()); + unspill->state = GenInstructionState(simdWidth); + unspill->state.noMask = 1; unspill->dst(0) = GenRegister(GEN_GENERAL_REGISTER_FILE, registerPool + regSlot.poolOffset, 0, selReg.type, selReg.vstride, selReg.width, selReg.hstride); - unspill->extra.scratchOffset = regSlot.addr; + unspill->extra.scratchOffset = regSlot.addr + selReg.quarter * 4 * simdWidth; unspill->extra.scratchMsgHeader = registerPool; insn.prepend(*unspill); } @@ -789,8 +791,8 @@ namespace gbe struct RegSlot regSlot(reg, dstID, poolOffset, it->second.isTmpReg, it->second.addr); - if(family == ir::FAMILY_QWORD) poolOffset +=2; - else poolOffset += 1; + if(family == ir::FAMILY_QWORD) poolOffset += 2 * simdWidth / 8; + else poolOffset += simdWidth / 8; regSet.push_back(regSlot); } } @@ -806,12 +808,16 @@ namespace gbe if(!regSlot.isTmpReg) { /* For temporary registers, we don't need to unspill. */ SelectionInstruction *spill = this->create(SEL_OP_SPILL_REG, 0, 1); - spill->state = GenInstructionState(ctx.getSimdWidth()); + spill->state = insn.state;//GenInstructionState(simdWidth); + spill->state.accWrEnable = 0; + spill->state.saturate = 0; + if (insn.opcode == SEL_OP_SEL) + spill->state.predicate = GEN_PREDICATE_NONE; spill->src(0) = GenRegister(GEN_GENERAL_REGISTER_FILE, registerPool + regSlot.poolOffset, 0, selReg.type, selReg.vstride, selReg.width, selReg.hstride); - spill->extra.scratchOffset = regSlot.addr; + spill->extra.scratchOffset = regSlot.addr + selReg.quarter * 4 * simdWidth; spill->extra.scratchMsgHeader = registerPool; insn.append(*spill); } @@ -2238,7 +2244,6 @@ namespace gbe } sel.pop(); - // All children are marked as root markAllChildren(dag); return true; diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp index 937f5b2..42bb8a6 100644 --- a/backend/src/backend/gen_reg_allocation.cpp +++ b/backend/src/backend/gen_reg_allocation.cpp @@ -28,6 +28,7 @@ #include "backend/gen_register.hpp" #include "backend/program.hpp" #include "sys/exception.hpp" +#include "sys/cvar.hpp" #include #include #include @@ -594,6 +595,7 @@ namespace gbe } } + IVAR(OCL_SIMD16_SPILL_THRESHOLD, 0, 16, 256); bool GenRegAllocator::Opaque::allocateGRFs(Selection &selection) { // Perform the linear scan allocator const uint32_t regNum = ctx.sel->getRegNum(); @@ -648,8 +650,16 @@ namespace gbe } if (!spilledRegs.empty()) { GBE_ASSERT(reservedReg != 0); + if (ctx.getSimdWidth() == 16) { + if (spilledRegs.size() > (unsigned int)OCL_SIMD16_SPILL_THRESHOLD) { + if (GBE_DEBUG) + std::cerr << "WARN: exceed simd 16 spill threshold (" + << spilledRegs.size() << ">" << OCL_SIMD16_SPILL_THRESHOLD + << ")" << std::endl; + return false; + } + } allocateScratchForSpilled(); - bool success = selection.spillRegs(spilledRegs, reservedReg); if (!success) { std::cerr << "Fail to spill registers." << std::endl; @@ -728,9 +738,14 @@ namespace gbe uint32_t regSize; ir::RegisterFamily family; getRegAttrib(reg, regSize, &family); - - if ((regSize == GEN_REG_SIZE && family == ir::FAMILY_DWORD) - || (regSize == 2*GEN_REG_SIZE && family == ir::FAMILY_QWORD)) { + // At simd16 mode, we may introduce some simd8 registers in te instruction selection stage. + // To spill those simd8 temporary registers will introduce unecessary complexity. We just simply + // avoid to spill those temporary registers here. + if (ctx.getSimdWidth() == 16 && reg.value() >= ctx.getFunction().getRegisterFile().regNum()) + return; + + if ((regSize == ctx.getSimdWidth()/8 * GEN_REG_SIZE && family == ir::FAMILY_DWORD) + || (regSize == 2 * ctx.getSimdWidth()/8 * GEN_REG_SIZE && family == ir::FAMILY_QWORD)) { GBE_ASSERT(offsetReg.find(grfOffset) == offsetReg.end()); offsetReg.insert(std::make_pair(grfOffset, reg)); spillCandidate.insert(intervals[reg]); @@ -747,6 +762,10 @@ namespace gbe bool isAllocated) { if (reservedReg == 0) return false; + + if (interval.reg.value() >= ctx.getFunction().getRegisterFile().regNum() && + ctx.getSimdWidth() == 16) + return false; SpillRegTag spillTag; spillTag.isTmpReg = interval.maxID == interval.minID; spillTag.addr = -1; @@ -762,9 +781,12 @@ namespace gbe return true; } + // Check whethere a vector which is allocated can be spilled out + // If a partial of a vector is expired, the vector will be unspillable, currently. + // FIXME we may need to fix those unspillable vector in the furture. INLINE bool GenRegAllocator::Opaque::vectorCanSpill(SelectionVector *vector) { for(uint32_t id = 0; id < vector->regNum; id++) - if (spillCandidate.find(intervals[(ir::Register)(vector->reg[id]).value.reg]) + if (spillCandidate.find(intervals[(ir::Register)(vector->reg[id].value.reg)]) == spillCandidate.end()) return false; return true; @@ -779,8 +801,12 @@ namespace gbe // If there is no spill candidate or current register is spillable and current register's // endpoint is after all the spillCandidate register's endpoint we return false. The // caller will spill current register. + // At simd16 mode, we will always try to spill here rather than return to the caller. + // The reason is that the caller may have a vector to allocate, and some element may be + // temporary registers which could not be spilled. if (it == spillCandidate.end() - || (it->getMaxID() <= interval.maxID && alignment == GEN_REG_SIZE)) + || (ctx.getSimdWidth() == 8 && (it->getMaxID() <= interval.maxID + && alignment == ctx.getSimdWidth()/8 * GEN_REG_SIZE))) return false; ir::Register reg = it->getReg(); @@ -800,11 +826,13 @@ namespace gbe spillSet.insert(vector->reg[id].reg()); reg = vector->reg[id].reg(); family = ctx.sel->getRegisterFamily(reg); - size -= family == ir::FAMILY_QWORD ? 2*GEN_REG_SIZE : GEN_REG_SIZE; + size -= family == ir::FAMILY_QWORD ? 2 * GEN_REG_SIZE * ctx.getSimdWidth()/8 + : GEN_REG_SIZE * ctx.getSimdWidth()/8; } } else if (!isVector) { spillSet.insert(reg); - size -= family == ir::FAMILY_QWORD ? 2*GEN_REG_SIZE : GEN_REG_SIZE; + size -= family == ir::FAMILY_QWORD ? 2 * GEN_REG_SIZE * ctx.getSimdWidth()/8 + : GEN_REG_SIZE * ctx.getSimdWidth()/8; } else needRestart = true; // is a vector which could not be spilled. @@ -812,7 +840,8 @@ namespace gbe break; if (!needRestart) { uint32_t offset = RA.find(reg)->second; - uint32_t nextOffset = (family == ir::FAMILY_QWORD) ? (offset + 2*GEN_REG_SIZE) : (offset + GEN_REG_SIZE); + uint32_t nextOffset = (family == ir::FAMILY_QWORD) ? (offset + 2 * GEN_REG_SIZE * ctx.getSimdWidth() / 8) + : (offset + GEN_REG_SIZE * ctx.getSimdWidth() / 8); auto nextRegIt = offsetReg.find(nextOffset); if (nextRegIt != offsetReg.end()) reg = nextRegIt->second; @@ -821,9 +850,18 @@ namespace gbe } if (needRestart) { +#if 0 + // FIXME, we should enable this code block in the future. + // If the spill set is not zero and we need a restart, we can + // simply return to try to allocate the registers at first. + // As some vectors which have expired elements may be marked as + // unspillable vector. + if (spillSet.size() > 0) + break; +#endif + it++; // next register is not in spill candidate. // let's move to next candidate and start over. - it++; if (it == spillCandidate.end()) return false; reg = it->getReg(); @@ -857,7 +895,8 @@ namespace gbe reservedReg = ctx.allocate(RESERVED_REG_NUM_FOR_SPILL * GEN_REG_SIZE, GEN_REG_SIZE); reservedReg /= GEN_REG_SIZE; } else { - reservedReg = 0; + reservedReg = ctx.allocate(RESERVED_REG_NUM_FOR_SPILL * GEN_REG_SIZE, GEN_REG_SIZE); + reservedReg /= GEN_REG_SIZE; } // schedulePreRegAllocation(ctx, selection); -- 2.7.4