if(insn.opcode == SEL_OP_SPILL_REG
|| insn.opcode == SEL_OP_UNSPILL_REG)
continue;
+ const int simdWidth = insn.state.execWidth;
const uint32_t srcNum = insn.srcNum, dstNum = insn.dstNum;
struct RegSlot {
it->second.isTmpReg,
it->second.addr);
if(family == ir::FAMILY_QWORD) {
- poolOffset += 2;
+ poolOffset += 2 * simdWidth / 8;
} else {
- poolOffset += 1;
+ poolOffset += simdWidth / 8;
}
regSet.push_back(regSlot);
}
if (!regSlot.isTmpReg) {
/* For temporary registers, we don't need to unspill. */
SelectionInstruction *unspill = this->create(SEL_OP_UNSPILL_REG, 1, 0);
- unspill->state = GenInstructionState(ctx.getSimdWidth());
+ unspill->state = GenInstructionState(simdWidth);
+ unspill->state.noMask = 1;
unspill->dst(0) = GenRegister(GEN_GENERAL_REGISTER_FILE,
registerPool + regSlot.poolOffset, 0,
selReg.type, selReg.vstride,
selReg.width, selReg.hstride);
- unspill->extra.scratchOffset = regSlot.addr;
+ unspill->extra.scratchOffset = regSlot.addr + selReg.quarter * 4 * simdWidth;
unspill->extra.scratchMsgHeader = registerPool;
insn.prepend(*unspill);
}
struct RegSlot regSlot(reg, dstID, poolOffset,
it->second.isTmpReg,
it->second.addr);
- if(family == ir::FAMILY_QWORD) poolOffset +=2;
- else poolOffset += 1;
+ if(family == ir::FAMILY_QWORD) poolOffset += 2 * simdWidth / 8;
+ else poolOffset += simdWidth / 8;
regSet.push_back(regSlot);
}
}
if(!regSlot.isTmpReg) {
/* For temporary registers, we don't need to unspill. */
SelectionInstruction *spill = this->create(SEL_OP_SPILL_REG, 0, 1);
- spill->state = GenInstructionState(ctx.getSimdWidth());
+ spill->state = insn.state;//GenInstructionState(simdWidth);
+ spill->state.accWrEnable = 0;
+ spill->state.saturate = 0;
+ if (insn.opcode == SEL_OP_SEL)
+ spill->state.predicate = GEN_PREDICATE_NONE;
spill->src(0) = GenRegister(GEN_GENERAL_REGISTER_FILE,
registerPool + regSlot.poolOffset, 0,
selReg.type, selReg.vstride,
selReg.width, selReg.hstride);
- spill->extra.scratchOffset = regSlot.addr;
+ spill->extra.scratchOffset = regSlot.addr + selReg.quarter * 4 * simdWidth;
spill->extra.scratchMsgHeader = registerPool;
insn.append(*spill);
}
}
sel.pop();
-
// All children are marked as root
markAllChildren(dag);
return true;
#include "backend/gen_register.hpp"
#include "backend/program.hpp"
#include "sys/exception.hpp"
+#include "sys/cvar.hpp"
#include <algorithm>
#include <climits>
#include <iostream>
}
}
+ IVAR(OCL_SIMD16_SPILL_THRESHOLD, 0, 16, 256);
bool GenRegAllocator::Opaque::allocateGRFs(Selection &selection) {
// Perform the linear scan allocator
const uint32_t regNum = ctx.sel->getRegNum();
}
if (!spilledRegs.empty()) {
GBE_ASSERT(reservedReg != 0);
+ if (ctx.getSimdWidth() == 16) {
+ if (spilledRegs.size() > (unsigned int)OCL_SIMD16_SPILL_THRESHOLD) {
+ if (GBE_DEBUG)
+ std::cerr << "WARN: exceed simd 16 spill threshold ("
+ << spilledRegs.size() << ">" << OCL_SIMD16_SPILL_THRESHOLD
+ << ")" << std::endl;
+ return false;
+ }
+ }
allocateScratchForSpilled();
-
bool success = selection.spillRegs(spilledRegs, reservedReg);
if (!success) {
std::cerr << "Fail to spill registers." << std::endl;
uint32_t regSize;
ir::RegisterFamily family;
getRegAttrib(reg, regSize, &family);
-
- if ((regSize == GEN_REG_SIZE && family == ir::FAMILY_DWORD)
- || (regSize == 2*GEN_REG_SIZE && family == ir::FAMILY_QWORD)) {
+ // At simd16 mode, we may introduce some simd8 registers in te instruction selection stage.
+ // To spill those simd8 temporary registers will introduce unecessary complexity. We just simply
+ // avoid to spill those temporary registers here.
+ if (ctx.getSimdWidth() == 16 && reg.value() >= ctx.getFunction().getRegisterFile().regNum())
+ return;
+
+ if ((regSize == ctx.getSimdWidth()/8 * GEN_REG_SIZE && family == ir::FAMILY_DWORD)
+ || (regSize == 2 * ctx.getSimdWidth()/8 * GEN_REG_SIZE && family == ir::FAMILY_QWORD)) {
GBE_ASSERT(offsetReg.find(grfOffset) == offsetReg.end());
offsetReg.insert(std::make_pair(grfOffset, reg));
spillCandidate.insert(intervals[reg]);
bool isAllocated) {
if (reservedReg == 0)
return false;
+
+ if (interval.reg.value() >= ctx.getFunction().getRegisterFile().regNum() &&
+ ctx.getSimdWidth() == 16)
+ return false;
SpillRegTag spillTag;
spillTag.isTmpReg = interval.maxID == interval.minID;
spillTag.addr = -1;
return true;
}
+ // Check whethere a vector which is allocated can be spilled out
+ // If a partial of a vector is expired, the vector will be unspillable, currently.
+ // FIXME we may need to fix those unspillable vector in the furture.
INLINE bool GenRegAllocator::Opaque::vectorCanSpill(SelectionVector *vector) {
for(uint32_t id = 0; id < vector->regNum; id++)
- if (spillCandidate.find(intervals[(ir::Register)(vector->reg[id]).value.reg])
+ if (spillCandidate.find(intervals[(ir::Register)(vector->reg[id].value.reg)])
== spillCandidate.end())
return false;
return true;
// If there is no spill candidate or current register is spillable and current register's
// endpoint is after all the spillCandidate register's endpoint we return false. The
// caller will spill current register.
+ // At simd16 mode, we will always try to spill here rather than return to the caller.
+ // The reason is that the caller may have a vector to allocate, and some element may be
+ // temporary registers which could not be spilled.
if (it == spillCandidate.end()
- || (it->getMaxID() <= interval.maxID && alignment == GEN_REG_SIZE))
+ || (ctx.getSimdWidth() == 8 && (it->getMaxID() <= interval.maxID
+ && alignment == ctx.getSimdWidth()/8 * GEN_REG_SIZE)))
return false;
ir::Register reg = it->getReg();
spillSet.insert(vector->reg[id].reg());
reg = vector->reg[id].reg();
family = ctx.sel->getRegisterFamily(reg);
- size -= family == ir::FAMILY_QWORD ? 2*GEN_REG_SIZE : GEN_REG_SIZE;
+ size -= family == ir::FAMILY_QWORD ? 2 * GEN_REG_SIZE * ctx.getSimdWidth()/8
+ : GEN_REG_SIZE * ctx.getSimdWidth()/8;
}
} else if (!isVector) {
spillSet.insert(reg);
- size -= family == ir::FAMILY_QWORD ? 2*GEN_REG_SIZE : GEN_REG_SIZE;
+ size -= family == ir::FAMILY_QWORD ? 2 * GEN_REG_SIZE * ctx.getSimdWidth()/8
+ : GEN_REG_SIZE * ctx.getSimdWidth()/8;
} else
needRestart = true; // is a vector which could not be spilled.
break;
if (!needRestart) {
uint32_t offset = RA.find(reg)->second;
- uint32_t nextOffset = (family == ir::FAMILY_QWORD) ? (offset + 2*GEN_REG_SIZE) : (offset + GEN_REG_SIZE);
+ uint32_t nextOffset = (family == ir::FAMILY_QWORD) ? (offset + 2 * GEN_REG_SIZE * ctx.getSimdWidth() / 8)
+ : (offset + GEN_REG_SIZE * ctx.getSimdWidth() / 8);
auto nextRegIt = offsetReg.find(nextOffset);
if (nextRegIt != offsetReg.end())
reg = nextRegIt->second;
}
if (needRestart) {
+#if 0
+ // FIXME, we should enable this code block in the future.
+ // If the spill set is not zero and we need a restart, we can
+ // simply return to try to allocate the registers at first.
+ // As some vectors which have expired elements may be marked as
+ // unspillable vector.
+ if (spillSet.size() > 0)
+ break;
+#endif
+ it++;
// next register is not in spill candidate.
// let's move to next candidate and start over.
- it++;
if (it == spillCandidate.end())
return false;
reg = it->getReg();
reservedReg = ctx.allocate(RESERVED_REG_NUM_FOR_SPILL * GEN_REG_SIZE, GEN_REG_SIZE);
reservedReg /= GEN_REG_SIZE;
} else {
- reservedReg = 0;
+ reservedReg = ctx.allocate(RESERVED_REG_NUM_FOR_SPILL * GEN_REG_SIZE, GEN_REG_SIZE);
+ reservedReg /= GEN_REG_SIZE;
}
// schedulePreRegAllocation(ctx, selection);