this->kernel->stackSize = 1*KB; // XXX compute that in a better way
}
- void Context::newCurbeEntry(gbe_curbe_type value,
+ uint32_t Context::newCurbeEntry(gbe_curbe_type value,
uint32_t subValue,
uint32_t size,
uint32_t alignment)
GBE_ASSERT(offset >= GEN_REG_SIZE);
kernel->patches.push_back(PatchInfo(value, subValue, offset - GEN_REG_SIZE));
kernel->curbeSize = std::max(kernel->curbeSize, offset + size - GEN_REG_SIZE);
+ return offset;
}
uint32_t Context::getImageInfoCurbeOffset(ir::ImageInfoKey key, size_t size)
return offset;
}
+
+ void Context::insertCurbeReg(ir::Register reg, uint32_t offset) {
+ curbeRegs.insert(std::make_pair(reg, offset));
+ }
+
void Context::buildPatchList(void) {
const uint32_t ptrSize = unit.getPointerSize() == ir::POINTER_32_BITS ? 4u : 8u;
kernel->curbeSize = 0u;
// We insert the block IP mask first
- this->newCurbeEntry(GBE_CURBE_BLOCK_IP, 0, this->simdWidth*sizeof(uint16_t));
+ this->insertCurbeReg(ir::ocl::blockip, this->newCurbeEntry(GBE_CURBE_BLOCK_IP, 0, this->simdWidth*sizeof(uint16_t)));
// Go over the arguments and find the related patch locations
const uint32_t argNum = fn.argNum();
arg.type == ir::FunctionArgument::STRUCTURE ||
arg.type == ir::FunctionArgument::IMAGE ||
arg.type == ir::FunctionArgument::SAMPLER)
- this->newCurbeEntry(GBE_CURBE_KERNEL_ARGUMENT, argID, arg.size, ptrSize);
+ this->insertCurbeReg(arg.reg, this->newCurbeEntry(GBE_CURBE_KERNEL_ARGUMENT, argID, arg.size, ptrSize));
}
// Already inserted registers go here
- set<ir::Register> specialRegs;
-
const size_t localIDSize = sizeof(uint32_t) * this->simdWidth;
- this->newCurbeEntry(GBE_CURBE_LOCAL_ID_X, 0, localIDSize);
- this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Y, 0, localIDSize);
- this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Z, 0, localIDSize);
- this->newCurbeEntry(GBE_CURBE_SAMPLER_INFO, 0, 32);
- specialRegs.insert(ir::ocl::lid0);
- specialRegs.insert(ir::ocl::lid1);
- specialRegs.insert(ir::ocl::lid2);
- specialRegs.insert(ir::ocl::samplerinfo);
+ insertCurbeReg(ir::ocl::lid0, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_X, 0, localIDSize));
+ insertCurbeReg(ir::ocl::lid1, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Y, 0, localIDSize));
+ insertCurbeReg(ir::ocl::lid2, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Z, 0, localIDSize));
+ insertCurbeReg(ir::ocl::samplerinfo, this->newCurbeEntry(GBE_CURBE_SAMPLER_INFO, 0, 32));
// Go over all the instructions and find the special register we need
// to push
#define INSERT_REG(SPECIAL_REG, PATCH, WIDTH) \
if (reg == ir::ocl::SPECIAL_REG) { \
- if (specialRegs.find(reg) != specialRegs.end()) continue; \
- this->newCurbeEntry(GBE_CURBE_##PATCH, 0, ptrSize * WIDTH); \
+ if (curbeRegs.find(reg) != curbeRegs.end()) continue; \
+ insertCurbeReg(reg, this->newCurbeEntry(GBE_CURBE_##PATCH, 0, ptrSize * WIDTH)); \
} else
bool useStackPtr = false;
for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
const ir::Register reg = insn.getSrc(srcID);
if (fn.isSpecialReg(reg) == false) continue;
- if (specialRegs.contains(reg) == true) continue;
+ if (curbeRegs.find(reg) != curbeRegs.end()) continue;
if (reg == ir::ocl::stackptr) useStackPtr = true;
INSERT_REG(lsize0, LOCAL_SIZE_X, 1)
INSERT_REG(lsize1, LOCAL_SIZE_Y, 1)
INSERT_REG(numgroup2, GROUP_NUM_Z, 1)
INSERT_REG(stackptr, STACK_POINTER, this->simdWidth)
do {} while (0);
- specialRegs.insert(reg);
}
});
#undef INSERT_REG
// Insert the number of threads
- this->newCurbeEntry(GBE_CURBE_THREAD_NUM, 0, sizeof(uint32_t));
+ insertCurbeReg(ir::ocl::threadn, this->newCurbeEntry(GBE_CURBE_THREAD_NUM, 0, sizeof(uint32_t)));
// Insert the stack buffer if used
if (useStackPtr)
- this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize);
+ insertCurbeReg(ir::ocl::stackptr, this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize));
// After this point the vector is immutable. Sorting it will make
// research faster
void allocateFlags(Selection &selection);
/*! Allocate the GRF registers */
bool allocateGRFs(Selection &selection);
+ /*! Create gen registers for all preallocated curbe registers. */
+ void allocatePayloadRegs(void);
/*! Create a Gen register from a register set in the payload */
- void allocatePayloadReg(gbe_curbe_type, ir::Register, uint32_t subValue = 0, uint32_t subOffset = 0);
+ void allocatePayloadReg(ir::Register, uint32_t offset, uint32_t subOffset = 0);
/*! Create the intervals for each register */
/*! Allocate the vectors detected in the instruction selection pass */
void allocateVector(Selection &selection);
GenRegAllocator::Opaque::Opaque(GenContext &ctx) : ctx(ctx) {}
GenRegAllocator::Opaque::~Opaque(void) {}
- void GenRegAllocator::Opaque::allocatePayloadReg(gbe_curbe_type value,
- ir::Register reg,
- uint32_t subValue,
+ void GenRegAllocator::Opaque::allocatePayloadReg(ir::Register reg,
+ uint32_t offset,
uint32_t subOffset)
{
using namespace ir;
- const Kernel *kernel = ctx.getKernel();
- const int32_t curbeOffset = kernel->getCurbeOffset(value, subValue);
- if (curbeOffset >= 0) {
- const uint32_t offset = GEN_REG_SIZE + curbeOffset + subOffset;
- RA.insert(std::make_pair(reg, offset));
- this->intervals[reg].minID = 0;
- this->intervals[reg].maxID = 0;
+ assert(offset >= GEN_REG_SIZE);
+ offset += subOffset;
+ RA.insert(std::make_pair(reg, offset));
+ GBE_ASSERT(reg != ocl::blockip || (offset % GEN_REG_SIZE == 0));
+ this->intervals[reg].minID = 0;
+ this->intervals[reg].maxID = 0;
+ }
+
+ INLINE void GenRegAllocator::Opaque::allocatePayloadRegs(void) {
+ using namespace ir;
+ for(auto &it : this->ctx.curbeRegs)
+ allocatePayloadReg(it.first, it.second);
+
+ // Allocate all pushed registers (i.e. structure kernel arguments)
+ const Function &fn = ctx.getFunction();
+ GBE_ASSERT(fn.getProfile() == PROFILE_OCL);
+ const Function::PushMap &pushMap = fn.getPushMap();
+ for (const auto &pushed : pushMap) {
+ const uint32_t argID = pushed.second.argID;
+ const FunctionArgument arg = fn.getArg(argID);
+
+ const uint32_t subOffset = pushed.second.offset;
+ const Register reg = pushed.second.getRegister();
+ auto it = this->ctx.curbeRegs.find(arg.reg);
+ assert(it != ctx.curbeRegs.end());
+ allocatePayloadReg(reg, it->second, subOffset);
}
}
}
return true;
}
+
INLINE bool GenRegAllocator::Opaque::allocate(Selection &selection) {
using namespace ir;
- const Kernel *kernel = ctx.getKernel();
- const Function &fn = ctx.getFunction();
- GBE_ASSERT(fn.getProfile() == PROFILE_OCL);
if (ctx.getSimdWidth() == 8) {
reservedReg = ctx.allocate(RESERVED_REG_NUM_FOR_SPILL * GEN_REG_SIZE, GEN_REG_SIZE);
reservedReg /= GEN_REG_SIZE;
this->intervals.push_back(ir::Register(regID));
// Allocate the special registers (only those which are actually used)
- allocatePayloadReg(GBE_CURBE_LOCAL_ID_X, ocl::lid0);
- allocatePayloadReg(GBE_CURBE_LOCAL_ID_Y, ocl::lid1);
- allocatePayloadReg(GBE_CURBE_LOCAL_ID_Z, ocl::lid2);
- allocatePayloadReg(GBE_CURBE_LOCAL_SIZE_X, ocl::lsize0);
- allocatePayloadReg(GBE_CURBE_LOCAL_SIZE_Y, ocl::lsize1);
- allocatePayloadReg(GBE_CURBE_LOCAL_SIZE_Z, ocl::lsize2);
- allocatePayloadReg(GBE_CURBE_GLOBAL_SIZE_X, ocl::gsize0);
- allocatePayloadReg(GBE_CURBE_GLOBAL_SIZE_Y, ocl::gsize1);
- allocatePayloadReg(GBE_CURBE_GLOBAL_SIZE_Z, ocl::gsize2);
- allocatePayloadReg(GBE_CURBE_GLOBAL_OFFSET_X, ocl::goffset0);
- allocatePayloadReg(GBE_CURBE_GLOBAL_OFFSET_Y, ocl::goffset1);
- allocatePayloadReg(GBE_CURBE_GLOBAL_OFFSET_Z, ocl::goffset2);
- allocatePayloadReg(GBE_CURBE_WORK_DIM, ocl::workdim);
- allocatePayloadReg(GBE_CURBE_SAMPLER_INFO, ocl::samplerinfo);
- allocatePayloadReg(GBE_CURBE_GROUP_NUM_X, ocl::numgroup0);
- allocatePayloadReg(GBE_CURBE_GROUP_NUM_Y, ocl::numgroup1);
- allocatePayloadReg(GBE_CURBE_GROUP_NUM_Z, ocl::numgroup2);
- allocatePayloadReg(GBE_CURBE_STACK_POINTER, ocl::stackptr);
- allocatePayloadReg(GBE_CURBE_THREAD_NUM, ocl::threadn);
+ this->allocatePayloadRegs();
// Group and barrier IDs are always allocated by the hardware in r0
RA.insert(std::make_pair(ocl::groupid0, 1*sizeof(float))); // r0.1
RA.insert(std::make_pair(ocl::barrierid, 2*sizeof(float))); // r0.2
// block IP used to handle the mask in SW is always allocated
- const int32_t blockIPOffset = GEN_REG_SIZE + kernel->getCurbeOffset(GBE_CURBE_BLOCK_IP,0);
- GBE_ASSERT(blockIPOffset >= 0 && blockIPOffset % GEN_REG_SIZE == 0);
- RA.insert(std::make_pair(ocl::blockip, blockIPOffset));
- this->intervals[ocl::blockip].minID = 0;
-
- // Allocate all (non-structure) argument parameters
- const uint32_t argNum = fn.argNum();
- for (uint32_t argID = 0; argID < argNum; ++argID) {
- const FunctionArgument &arg = fn.getArg(argID);
- GBE_ASSERT(arg.type == FunctionArgument::GLOBAL_POINTER ||
- arg.type == FunctionArgument::CONSTANT_POINTER ||
- arg.type == FunctionArgument::LOCAL_POINTER ||
- arg.type == FunctionArgument::VALUE ||
- arg.type == FunctionArgument::STRUCTURE ||
- arg.type == FunctionArgument::IMAGE ||
- arg.type == FunctionArgument::SAMPLER);
- allocatePayloadReg(GBE_CURBE_KERNEL_ARGUMENT, arg.reg, argID);
- }
-
- // Allocate all pushed registers (i.e. structure kernel arguments)
- const Function::PushMap &pushMap = fn.getPushMap();
- for (const auto &pushed : pushMap) {
- const uint32_t argID = pushed.second.argID;
- const uint32_t subOffset = pushed.second.offset;
- const Register reg = pushed.second.getRegister();
- allocatePayloadReg(GBE_CURBE_KERNEL_ARGUMENT, reg, argID, subOffset);
- }
// Compute the intervals
int32_t insnID = 0;