From 4c713434c54a9edafbf5c5e9a830fd00cbf7db0d Mon Sep 17 00:00:00 2001 From: Ruiling Song Date: Wed, 30 Jul 2014 13:59:30 +0800 Subject: [PATCH] GBE: Handle bti allocation for internal buffer used by printf. 1. Move the bti/Register map from gbe::Context to ir::Function. 2. use GlobalVariable instead of 'call' to get internal buffer (used for printf) base address. Signed-off-by: Ruiling Song Reviewed-by: Zhigang Gong --- backend/src/backend/context.cpp | 8 +------- backend/src/backend/program.cpp | 16 ++++++++++++++++ backend/src/backend/program.h | 7 +++++++ backend/src/backend/program.hpp | 9 +++++++++ backend/src/gbe_bin_interpreter.cpp | 2 ++ backend/src/ir/context.hpp | 1 + backend/src/ir/function.cpp | 9 +++++++++ backend/src/ir/function.hpp | 4 ++++ backend/src/ir/printf.hpp | 9 +++++++++ backend/src/llvm/llvm_gen_backend.cpp | 18 ++++++++++++++++-- backend/src/llvm/llvm_printf_parser.cpp | 17 ++++++++--------- src/cl_command_queue_gen7.c | 4 ++-- src/cl_driver.h | 2 +- src/cl_gbe_loader.cpp | 10 ++++++++++ src/cl_gbe_loader.h | 2 ++ src/intel/intel_gpgpu.c | 4 ++-- 16 files changed, 99 insertions(+), 23 deletions(-) diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp index a9d5d4a..6a2d70b 100644 --- a/backend/src/backend/context.cpp +++ b/backend/src/backend/context.cpp @@ -437,10 +437,7 @@ namespace gbe curbeRegs.insert(std::make_pair(reg, offset)); } ir::Register Context::getSurfaceBaseReg(unsigned char bti) { - map::iterator iter; - iter = btiRegMap.find(bti); - GBE_ASSERT(iter != btiRegMap.end()); - return iter->second; + return fn.getSurfaceBaseReg(bti); } void Context::buildArgList(void) { @@ -449,8 +446,6 @@ namespace gbe kernel->args = GBE_NEW_ARRAY_NO_ARG(KernelArgument, kernel->argNum); else kernel->args = NULL; - btiRegMap.clear(); - btiRegMap.insert(std::make_pair(1, ir::ocl::stackbuffer)); for (uint32_t argID = 0; argID < kernel->argNum; ++argID) { const auto &arg = fn.getArg(argID); @@ -466,7 +461,6 @@ namespace gbe kernel->args[argID].type = GBE_ARG_GLOBAL_PTR; kernel->args[argID].size = sizeof(void*); kernel->args[argID].bti = arg.bti; - btiRegMap.insert(std::make_pair(arg.bti, arg.reg)); break; case ir::FunctionArgument::CONSTANT_POINTER: kernel->args[argID].type = GBE_ARG_CONSTANT_PTR; diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index 20c1807..c7570de 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -1099,6 +1099,18 @@ namespace gbe { return kernel->dupPrintfSet(); } + static uint8_t kernelGetPrintfBufBTI(void * printf_info) { + if (printf_info == NULL) return 0; + const ir::PrintfSet *ps = (ir::PrintfSet *)printf_info; + return ps->getBufBTI(); + } + + static uint8_t kernelGetPrintfIndexBufBTI(void * printf_info) { + if (printf_info == NULL) return 0; + const ir::PrintfSet *ps = (ir::PrintfSet *)printf_info; + return ps->getIndexBufBTI(); + } + static void kernelReleasePrintfSet(void * printf_info) { if (printf_info == NULL) return; ir::PrintfSet *ps = (ir::PrintfSet *)printf_info; @@ -1207,6 +1219,8 @@ GBE_EXPORT_SYMBOL gbe_set_image_base_index_cb *gbe_set_image_base_index = NULL; GBE_EXPORT_SYMBOL gbe_get_image_base_index_cb *gbe_get_image_base_index = NULL; GBE_EXPORT_SYMBOL gbe_get_printf_num_cb *gbe_get_printf_num = NULL; GBE_EXPORT_SYMBOL gbe_dup_printfset_cb *gbe_dup_printfset = NULL; +GBE_EXPORT_SYMBOL gbe_get_printf_buf_bti_cb *gbe_get_printf_buf_bti = NULL; +GBE_EXPORT_SYMBOL gbe_get_printf_indexbuf_bti_cb *gbe_get_printf_indexbuf_bti = NULL; GBE_EXPORT_SYMBOL gbe_release_printf_info_cb *gbe_release_printf_info = NULL; GBE_EXPORT_SYMBOL gbe_get_printf_sizeof_size_cb *gbe_get_printf_sizeof_size = NULL; GBE_EXPORT_SYMBOL gbe_output_printf_cb *gbe_output_printf = NULL; @@ -1253,6 +1267,8 @@ namespace gbe gbe_get_image_base_index = gbe::getImageBaseIndex; gbe_set_image_base_index = gbe::setImageBaseIndex; gbe_get_printf_num = gbe::kernelGetPrintfNum; + gbe_get_printf_buf_bti = gbe::kernelGetPrintfBufBTI; + gbe_get_printf_indexbuf_bti = gbe::kernelGetPrintfIndexBufBTI; gbe_dup_printfset = gbe::kernelDupPrintfSet; gbe_get_printf_sizeof_size = gbe::kernelGetPrintfSizeOfSize; gbe_release_printf_info = gbe::kernelReleasePrintfSet; diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h index 330a3de..39ff402 100644 --- a/backend/src/backend/program.h +++ b/backend/src/backend/program.h @@ -133,6 +133,13 @@ extern gbe_kernel_get_image_data_cb *gbe_kernel_get_image_data; typedef uint32_t (gbe_get_printf_num_cb)(void* printf_info); extern gbe_get_printf_num_cb *gbe_get_printf_num; +/*! Get the printf buffer bti */ +typedef uint8_t (gbe_get_printf_buf_bti_cb)(void* printf_info); +extern gbe_get_printf_buf_bti_cb *gbe_get_printf_buf_bti; + +typedef uint8_t (gbe_get_printf_indexbuf_bti_cb)(void* printf_info); +extern gbe_get_printf_indexbuf_bti_cb *gbe_get_printf_indexbuf_bti; + /*! Release the printfset */ typedef void (gbe_release_printf_info_cb)(void* printf_info); extern gbe_release_printf_info_cb *gbe_release_printf_info; diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp index a6303b9..8f5f125 100644 --- a/backend/src/backend/program.hpp +++ b/backend/src/backend/program.hpp @@ -145,6 +145,15 @@ namespace gbe { void* ptr = printfSet ? (void *)(new ir::PrintfSet(*printfSet)) : NULL; return ptr; } + uint8_t getPrintfBufBTI() const { + GBE_ASSERT(printfSet); + return printfSet->getBufBTI(); + } + + uint8_t getPrintfIndexBufBTI() const { + GBE_ASSERT(printfSet); + return printfSet->getIndexBufBTI(); + } void outputPrintf(void* index_addr, void* buf_addr, size_t global_wk_sz0, size_t global_wk_sz1, size_t global_wk_sz2) { diff --git a/backend/src/gbe_bin_interpreter.cpp b/backend/src/gbe_bin_interpreter.cpp index 2f02b34..ffca1f5 100644 --- a/backend/src/gbe_bin_interpreter.cpp +++ b/backend/src/gbe_bin_interpreter.cpp @@ -66,6 +66,8 @@ struct BinInterpCallBackInitializer gbe_get_image_base_index = gbe::getImageBaseIndex; gbe_set_image_base_index = gbe::setImageBaseIndex; gbe_get_printf_num = gbe::kernelGetPrintfNum; + gbe_get_printf_buf_bti = gbe::kernelGetPrintfBufBTI; + gbe_get_printf_indexbuf_bti = gbe::kernelGetPrintfIndexBufBTI; gbe_dup_printfset = gbe::kernelDupPrintfSet; gbe_get_printf_sizeof_size = gbe::kernelGetPrintfSizeOfSize; gbe_release_printf_info = gbe::kernelReleasePrintfSet; diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp index 8718a86..f3f4b50 100644 --- a/backend/src/ir/context.hpp +++ b/backend/src/ir/context.hpp @@ -180,6 +180,7 @@ namespace ir { GBE_ASSERT(valueNum > 0); this->STORE(type, index, offset, space, valueNum, dwAligned, bti); } + void appendSurface(uint8_t bti, Register reg) { fn->appendSurface(bti, reg); } protected: /*! A block must be started with a label */ diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp index 519a70b..6c6e576 100644 --- a/backend/src/ir/function.cpp +++ b/backend/src/ir/function.cpp @@ -220,6 +220,15 @@ namespace ir { const uint32_t specialNum = this->getSpecialRegNum(); return ID >= firstID && ID < firstID + specialNum; } + Register Function::getSurfaceBaseReg(uint8_t bti) const { + map::const_iterator iter = btiRegMap.find(bti); + GBE_ASSERT(iter != btiRegMap.end()); + return iter->second; + } + + void Function::appendSurface(uint8_t bti, Register reg) { + btiRegMap.insert(std::make_pair(bti, reg)); + } void Function::computeCFG(void) { // Clear possible previously computed CFG and compute the direct diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp index deb7552..21e1a66 100644 --- a/backend/src/ir/function.hpp +++ b/backend/src/ir/function.hpp @@ -414,6 +414,9 @@ namespace ir { void addLoop(const vector &bbs, const vector> &exits); INLINE const vector &getLoops() { return loops; } vector &getBlocks() { return blocks; } + /*! Get surface starting address register from bti */ + Register getSurfaceBaseReg(uint8_t bti) const; + void appendSurface(uint8_t bti, Register reg); private: friend class Context; //!< Can freely modify a function std::string name; //!< Function name @@ -424,6 +427,7 @@ namespace ir { vector immediates; //!< All immediate values in the function vector blocks; //!< All chained basic blocks vector loops; //!< Loops info of the function + map btiRegMap;//!< map bti to surface base address RegisterFile file; //!< RegisterDatas used by the instructions Profile profile; //!< Current function profile PushMap pushMap; //!< Pushed function arguments (reg->loc) diff --git a/backend/src/ir/printf.hpp b/backend/src/ir/printf.hpp index 1aef767..4db7245 100644 --- a/backend/src/ir/printf.hpp +++ b/backend/src/ir/printf.hpp @@ -155,6 +155,8 @@ namespace gbe } sizeOfSize = other.sizeOfSize; + btiBuf = other.btiBuf; + btiIndexBuf = other.btiIndexBuf; } PrintfSet(void) = default; @@ -180,6 +182,11 @@ namespace gbe return sizeOfSize; } + void setBufBTI(uint8_t b) { btiBuf = b; } + void setIndexBufBTI(uint8_t b) { btiIndexBuf = b; } + uint8_t getBufBTI() const { return btiBuf; } + uint8_t getIndexBufBTI() const { return btiIndexBuf; } + uint32_t getPrintfBufferElementSize(uint32_t i) { PrintfSlot* slot = slots[i]; int vec_num = 1; @@ -226,6 +233,8 @@ namespace gbe vector slots; uint32_t sizeOfSize; // Total sizeof size. friend struct LockOutput; + uint8_t btiBuf; + uint8_t btiIndexBuf; static pthread_mutex_t lock; GBE_CLASS(PrintfSet); }; diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 2ab12a4..4633b6b 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -1164,6 +1164,7 @@ namespace gbe argNameNode = attrNode; } } + ctx.appendSurface(1, ir::ocl::stackbuffer); ctx.getFunction().setCompileWorkGroupSize(reqd_wg_sz[0], reqd_wg_sz[1], reqd_wg_sz[2]); // Loop over the arguments and output registers for them @@ -1238,6 +1239,7 @@ namespace gbe switch (addrSpace) { case ir::MEM_GLOBAL: globalPointer.insert(std::make_pair(I, btiBase)); + ctx.appendSurface(btiBase, reg); ctx.input(argName, ir::FunctionArgument::GLOBAL_POINTER, reg, llvmInfo, ptrSize, align, btiBase); btiBase++; break; @@ -1593,7 +1595,19 @@ namespace gbe GBE_ASSERT(con.getName() == v.getName()); ctx.LOADI(ir::TYPE_S32, reg, ctx.newIntegerImmediate(con.getOffset(), ir::TYPE_S32)); } else { - GBE_ASSERT(0); + if(v.getName().equals(StringRef("__gen_ocl_printf_buf"))) { + ctx.appendSurface(btiBase, ir::ocl::printfbptr); + ctx.getFunction().getPrintfSet()->setBufBTI(btiBase); + globalPointer.insert(std::make_pair(&v, btiBase++)); + regTranslator.newScalarProxy(ir::ocl::printfbptr, const_cast(&v)); + } else if(v.getName().equals(StringRef("__gen_ocl_printf_index_buf"))) { + ctx.appendSurface(btiBase, ir::ocl::printfiptr); + ctx.getFunction().getPrintfSet()->setIndexBufBTI(btiBase); + globalPointer.insert(std::make_pair(&v, btiBase++)); + regTranslator.newScalarProxy(ir::ocl::printfiptr, const_cast(&v)); + } else { + GBE_ASSERT(0); + } } } @@ -3251,7 +3265,7 @@ handle_write_image: bool isPrivate = false; p = candidates[idx]; - while (dyn_cast(p)) { + while (dyn_cast(p) && !dyn_cast(p)) { if (processed.find(p) == processed.end()) { processed.insert(p); diff --git a/backend/src/llvm/llvm_printf_parser.cpp b/backend/src/llvm/llvm_printf_parser.cpp index 384d36f..e02f5aa 100644 --- a/backend/src/llvm/llvm_printf_parser.cpp +++ b/backend/src/llvm/llvm_printf_parser.cpp @@ -467,7 +467,7 @@ error: // (index_offset + offset)* sizeof(int) op0 = builder->CreateMul(op0, ConstantInt::get(intTy, sizeof(int))); // Final index address = index_buf_ptr + (index_offset + offset)* sizeof(int) - op0 = builder->CreateAdd(op0, index_buf_ptr); + op0 = builder->CreateAdd(index_buf_ptr, op0); Value* index_addr = builder->CreateIntToPtr(op0, Type::getInt32PtrTy(module->getContext(), 1)); builder->CreateStore(ConstantInt::get(intTy, 1), index_addr);// The flag @@ -507,7 +507,7 @@ error: //offset * sizeof(specify) val = builder->CreateMul(offset, ConstantInt::get(intTy, sizeof_size)); //data_offset + pbuf_ptr - op0 = builder->CreateAdd(op0, pbuf_ptr); + op0 = builder->CreateAdd(pbuf_ptr, op0); op0 = builder->CreateAdd(op0, val); data_addr = builder->CreateIntToPtr(op0, dst_type); builder->CreateStore(out_arg, data_addr); @@ -575,15 +575,14 @@ error: if (!pbuf_ptr) { /* alloc a new buffer ptr to collect the print output. */ - pbuf_ptr = builder->CreateCall(cast(module->getOrInsertFunction( - "__gen_ocl_printf_get_buf_addr", Type::getInt32Ty(module->getContext()), - NULL))); + Type *ptrTy = Type::getInt32PtrTy(module->getContext()); + llvm::Constant * pBuf = module->getOrInsertGlobal(StringRef("__gen_ocl_printf_buf"), ptrTy); + pbuf_ptr = builder->CreatePtrToInt(pBuf, Type::getInt32Ty(module->getContext())); } if (!index_buf_ptr) { - /* alloc a new buffer ptr to collect the print valid index. */ - index_buf_ptr = builder->CreateCall(cast(module->getOrInsertFunction( - "__gen_ocl_printf_get_index_buf_addr", Type::getInt32Ty(module->getContext()), - NULL))); + Type *ptrTy = Type::getInt32PtrTy(module->getContext()); + llvm::Constant * pBuf = module->getOrInsertGlobal(StringRef("__gen_ocl_printf_index_buf"), ptrTy); + index_buf_ptr = builder->CreatePtrToInt(pBuf, Type::getInt32Ty(module->getContext())); } deadprintfs.push_back(PrintfInst(cast(call),parseOnePrintfInstruction(call))); diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index a4c8af7..330f0f9 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -264,7 +264,7 @@ cl_bind_printf(cl_gpgpu gpgpu, cl_kernel ker, void* printf_info, int printf_num, int32_t offset = interp_kernel_get_curbe_offset(ker->opaque, value, 0); size_t buf_size = global_sz * sizeof(int) * printf_num; if (offset > 0) { - if (cl_gpgpu_set_printf_buffer(gpgpu, 0, buf_size, offset) != 0) + if (cl_gpgpu_set_printf_buffer(gpgpu, 0, buf_size, offset, interp_get_printf_indexbuf_bti(printf_info)) != 0) return -1; } @@ -272,7 +272,7 @@ cl_bind_printf(cl_gpgpu gpgpu, cl_kernel ker, void* printf_info, int printf_num, offset = interp_kernel_get_curbe_offset(ker->opaque, value, 0); buf_size = interp_get_printf_sizeof_size(printf_info) * global_sz; if (offset > 0) { - if (cl_gpgpu_set_printf_buffer(gpgpu, 1, buf_size, offset) != 0) + if (cl_gpgpu_set_printf_buffer(gpgpu, 1, buf_size, offset, interp_get_printf_buf_bti(printf_info)) != 0) return -1; } return 0; diff --git a/src/cl_driver.h b/src/cl_driver.h index 461c11e..9cdba98 100644 --- a/src/cl_driver.h +++ b/src/cl_driver.h @@ -226,7 +226,7 @@ typedef void (cl_gpgpu_unref_batch_buf_cb)(void*); extern cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf; /* Set the printf buffer */ -typedef int (cl_gpgpu_set_printf_buffer_cb)(cl_gpgpu, uint32_t, uint32_t, uint32_t); +typedef int (cl_gpgpu_set_printf_buffer_cb)(cl_gpgpu, uint32_t, uint32_t, uint32_t, uint8_t); extern cl_gpgpu_set_printf_buffer_cb *cl_gpgpu_set_printf_buffer; /* get the printf buffer offset in the apeture*/ diff --git a/src/cl_gbe_loader.cpp b/src/cl_gbe_loader.cpp index 5f2f9ce..9d609c7 100644 --- a/src/cl_gbe_loader.cpp +++ b/src/cl_gbe_loader.cpp @@ -66,6 +66,8 @@ gbe_kernel_get_image_data_cb *interp_kernel_get_image_data = NULL; gbe_set_image_base_index_cb *interp_set_image_base_index = NULL; gbe_get_image_base_index_cb *interp_get_image_base_index = NULL; gbe_get_printf_num_cb* interp_get_printf_num = NULL; +gbe_get_printf_buf_bti_cb* interp_get_printf_buf_bti = NULL; +gbe_get_printf_indexbuf_bti_cb* interp_get_printf_indexbuf_bti = NULL; gbe_dup_printfset_cb* interp_dup_printfset = NULL; gbe_get_printf_sizeof_size_cb* interp_get_printf_sizeof_size = NULL; gbe_release_printf_info_cb* interp_release_printf_info = NULL; @@ -220,6 +222,14 @@ struct GbeLoaderInitializer if (interp_get_printf_num == NULL) return false; + interp_get_printf_buf_bti = *(gbe_get_printf_buf_bti_cb**)dlsym(dlhInterp, "gbe_get_printf_buf_bti"); + if (interp_get_printf_buf_bti == NULL) + return false; + + interp_get_printf_indexbuf_bti = *(gbe_get_printf_indexbuf_bti_cb**)dlsym(dlhInterp, "gbe_get_printf_indexbuf_bti"); + if (interp_get_printf_indexbuf_bti == NULL) + return false; + interp_dup_printfset = *(gbe_dup_printfset_cb**)dlsym(dlhInterp, "gbe_dup_printfset"); if (interp_dup_printfset == NULL) return false; diff --git a/src/cl_gbe_loader.h b/src/cl_gbe_loader.h index d095240..6cbc99e 100644 --- a/src/cl_gbe_loader.h +++ b/src/cl_gbe_loader.h @@ -66,6 +66,8 @@ extern gbe_kernel_get_image_data_cb *interp_kernel_get_image_data; extern gbe_set_image_base_index_cb *interp_set_image_base_index; extern gbe_get_image_base_index_cb *interp_get_image_base_index; extern gbe_get_printf_num_cb* interp_get_printf_num; +extern gbe_get_printf_buf_bti_cb* interp_get_printf_buf_bti; +extern gbe_get_printf_indexbuf_bti_cb* interp_get_printf_indexbuf_bti; extern gbe_dup_printfset_cb* interp_dup_printfset; extern gbe_get_printf_sizeof_size_cb* interp_get_printf_sizeof_size; extern gbe_release_printf_info_cb* interp_release_printf_info; diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index e0c00ea..1382ce6 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -1353,7 +1353,7 @@ intel_gpgpu_event_get_exec_timestamp(intel_gpgpu_t* gpgpu, intel_event_t *event, } static int -intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i, uint32_t size, uint32_t offset) +intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i, uint32_t size, uint32_t offset, uint8_t bti) { drm_intel_bo *bo = NULL; if (i == 0) { // the index buffer. @@ -1378,7 +1378,7 @@ intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i, uint32_t size, uint } memset(bo->virtual, 0, size); drm_intel_bo_unmap(bo); - intel_gpgpu_bind_buf(gpgpu, bo, offset, 0, size, 0); + intel_gpgpu_bind_buf(gpgpu, bo, offset, 0, size, bti); return 0; } -- 2.7.4