}
});
#undef INSERT_REG
+ this->newCurbeEntry(GBE_CURBE_GLOBAL_CONSTANT_OFFSET, 0, sizeof(int));
+ specialRegs.insert(ir::ocl::constoffst);
+
+ // Insert serialized global constant arrays if used
+ const ir::ConstantSet& constantSet = unit.getConstantSet();
+ if (constantSet.getConstantNum()) {
+ size_t size = constantSet.getDataSize();
+ this->newCurbeEntry(GBE_CURBE_GLOBAL_CONSTANT_DATA, 0, size);
+ }
// Insert the number of threads
this->newCurbeEntry(GBE_CURBE_THREAD_NUM, 0, sizeof(uint32_t));
reg == ir::ocl::gsize2 ||
reg == ir::ocl::goffset0 ||
reg == ir::ocl::goffset1 ||
- reg == ir::ocl::goffset2)
+ reg == ir::ocl::goffset2 ||
+ reg == ir::ocl::constoffst)
return true;
return false;
}
virtual bool doInitialization(Module &M);
+ void collectGlobalConstant(void) const;
+
bool runOnFunction(Function &F) {
// Do not codegen any 'available_externally' functions at all, they have
// definitions outside the translation unit.
char GenWriter::ID = 0;
+ void GenWriter::collectGlobalConstant(void) const {
+ const Module::GlobalListType &globalList = TheModule->getGlobalList();
+ for(auto i = globalList.begin(); i != globalList.end(); i ++) {
+ const GlobalVariable &v = *i;
+ const char *name = v.getName().data();
+ unsigned addrSpace = v.getType()->getAddressSpace();
+ if(addrSpace == ir::AddressSpace::MEM_CONSTANT) {
+ GBE_ASSERT(v.hasInitializer());
+ const Constant *c = v.getInitializer();
+ GBE_ASSERT(c->getType()->getTypeID() == Type::ArrayTyID);
+ const ConstantDataArray *cda = dyn_cast<ConstantDataArray>(c);
+ GBE_ASSERT(cda);
+ unsigned len = cda->getNumElements();
+ uint64_t elementSize = cda->getElementByteSize();
+ Type::TypeID typeID = cda->getElementType()->getTypeID();
+ if(typeID == Type::TypeID::IntegerTyID)
+ elementSize = sizeof(unsigned);
+ void *mem = malloc(elementSize * len);
+ for(unsigned j = 0; j < len; j ++) {
+ switch(typeID) {
+ case Type::TypeID::FloatTyID:
+ {
+ float f = cda->getElementAsFloat(j);
+ memcpy((float *)mem + j, &f, elementSize);
+ }
+ break;
+ case Type::TypeID::DoubleTyID:
+ {
+ double d = cda->getElementAsDouble(j);
+ memcpy((double *)mem + j, &d, elementSize);
+ }
+ break;
+ case Type::TypeID::IntegerTyID:
+ {
+ unsigned u = (unsigned) cda->getElementAsInteger(j);
+ memcpy((unsigned *)mem + j, &u, elementSize);
+ }
+ break;
+ default:
+ NOT_IMPLEMENTED;
+ }
+ }
+ unit.newConstant((char *)mem, name, elementSize * len, sizeof(unsigned));
+ free(mem);
+ }
+ }
+ }
+
bool GenWriter::doInitialization(Module &M) {
FunctionPass::doInitialization(M);
// Initialize
TheModule = &M;
+ collectGlobalConstant();
return false;
}
}
ir::Register GenWriter::getRegister(Value *value, uint32_t elemID) {
+ if (dyn_cast<ConstantExpr>(value)) {
+ ConstantExpr *ce = dyn_cast<ConstantExpr>(value);
+ if(ce->isCast()) {
+ GBE_ASSERT(ce->getOpcode() == Instruction::PtrToInt);
+ const Value *pointer = ce->getOperand(0);
+ GBE_ASSERT(pointer->hasName());
+ auto name = pointer->getName().str();
+ uint16_t reg = unit.getConstantSet().getConstant(name).getReg();
+ return ir::Register(reg);
+ }
+ }
Constant *CPV = dyn_cast<Constant>(value);
if (CPV) {
GBE_ASSERT(isa<GlobalValue>(CPV) == false);
this->labelMap.clear();
this->emitFunctionPrototype(F);
+ // Allocate a virtual register for each global constant array
+ const Module::GlobalListType &globalList = TheModule->getGlobalList();
+ size_t j = 0;
+ for(auto i = globalList.begin(); i != globalList.end(); i ++) {
+ const GlobalVariable &v = *i;
+ unsigned addrSpace = v.getType()->getAddressSpace();
+ if(addrSpace != ir::AddressSpace::MEM_CONSTANT)
+ continue;
+ GBE_ASSERT(v.hasInitializer());
+ const Constant *c = v.getInitializer();
+ GBE_ASSERT(c->getType()->getTypeID() == Type::ArrayTyID);
+ const ConstantDataArray *cda = dyn_cast<ConstantDataArray>(c);
+ GBE_ASSERT(cda);
+ ir::Register reg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
+ ir::Constant &con = unit.getConstantSet().getConstant(j ++);
+ con.setReg(reg.value());
+ if(con.getOffset() != 0) {
+ ctx.LOADI(ir::TYPE_S32, reg, ctx.newIntegerImmediate(con.getOffset(), ir::TYPE_S32));
+ ctx.ADD(ir::TYPE_S32, reg, ir::ocl::constoffst, reg);
+ } else {
+ ctx.MOV(ir::TYPE_S32, reg, ir::ocl::constoffst);
+ }
+ }
+
// Visit all the instructions and emit the IR registers or the value to
// value mapping when a new register is not needed
pass = PASS_EMIT_REGISTERS;
UPLOAD(GBE_CURBE_GROUP_NUM_Y, global_wk_sz[1]/local_wk_sz[1]);
UPLOAD(GBE_CURBE_GROUP_NUM_Z, global_wk_sz[2]/local_wk_sz[2]);
UPLOAD(GBE_CURBE_THREAD_NUM, thread_n);
+ UPLOAD(GBE_CURBE_GLOBAL_CONSTANT_OFFSET, gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_GLOBAL_CONSTANT_DATA, 0) + 32);
#undef UPLOAD
/* Write identity for the stack pointer. This is required by the stack pointer
for (i = 0; i < (int32_t) simd_sz; ++i) stackptr[i] = i;
}
+ /* Write global constant arrays */
+ if ((offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_GLOBAL_CONSTANT_DATA, 0)) >= 0) {
+ /* Write the global constant arrays */
+ gbe_program prog = ker->program->opaque;
+ gbe_program_get_global_constant_data(prog, ker->curbe + offset);
+ }
+
/* Handle the various offsets to SLM */
const int32_t arg_n = gbe_kernel_get_arg_num(ker->opaque);
int32_t arg, slm_offset = 0;