From fded6034dfcc9daa2bbbe0002b7ace844a767bb5 Mon Sep 17 00:00:00 2001 From: Benjamin Segovia Date: Wed, 25 Apr 2012 20:30:48 +0000 Subject: [PATCH] Simplify the way we handle curbe --- backend/src/backend/context.cpp | 60 ++++++++++++++++++++++--------------- backend/src/backend/gen_context.cpp | 15 ---------- backend/src/backend/gen_eu.hpp | 48 ++++++++++++++--------------- backend/src/backend/program.h | 3 +- backend/src/backend/sim_context.cpp | 3 +- backend/src/backend/sim_context.hpp | 2 ++ 6 files changed, 66 insertions(+), 65 deletions(-) diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp index 5fa0260..85f3e92 100644 --- a/backend/src/backend/context.cpp +++ b/backend/src/backend/context.cpp @@ -34,7 +34,6 @@ namespace gbe { - IVAR(OCL_SIMD_WIDTH, 8, 16, 32); Context::Context(const ir::Unit &unit, const std::string &name) : @@ -76,18 +75,42 @@ namespace gbe } } + // Already inserted registers go here + set specialRegs; + + // We insert the block IP mask first + kernel->patches.push_back(PatchInfo(GBE_CURBE_BLOCK_IP, 0, kernel->curbeSize)); + kernel->curbeSize += this->simdWidth * sizeof(uint16_t); + + // Then the local IDs (not scalar, so we align them properly) + kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE); + if (this->simdWidth == 16 || this->simdWidth == 32) + if ((kernel->curbeSize + GEN_REG_SIZE) % (2*GEN_REG_SIZE) != 0) + kernel->curbeSize += GEN_REG_SIZE; + const size_t localIDSize = sizeof(uint32_t) * this->simdWidth; + const PatchInfo lid0(GBE_CURBE_LOCAL_ID_X, 0, kernel->curbeSize); + kernel->curbeSize += localIDSize; + const PatchInfo lid1(GBE_CURBE_LOCAL_ID_Y, 0, kernel->curbeSize); + kernel->curbeSize += localIDSize; + const PatchInfo lid2(GBE_CURBE_LOCAL_ID_Z, 0, kernel->curbeSize); + kernel->curbeSize += localIDSize; + kernel->patches.push_back(lid0); + kernel->patches.push_back(lid1); + kernel->patches.push_back(lid2); + specialRegs.insert(ir::ocl::lid0); + specialRegs.insert(ir::ocl::lid1); + specialRegs.insert(ir::ocl::lid2); + // Go over all the instructions and find the special register value we need // to push -#define INSERT_REG(SPECIAL_REG, PATCH) \ - if (reg == ir::ocl::SPECIAL_REG) { \ - if (specialRegs.find(reg) != specialRegs.end()) continue; \ +#define INSERT_REG(SPECIAL_REG, PATCH) \ + if (reg == ir::ocl::SPECIAL_REG) { \ + if (specialRegs.find(reg) != specialRegs.end()) continue; \ const PatchInfo patch(GBE_CURBE_##PATCH, 0, kernel->curbeSize); \ - kernel->patches.push_back(patch); \ - kernel->curbeSize += ptrSize; \ + kernel->patches.push_back(patch); \ + kernel->curbeSize += ptrSize; \ } else - set specialRegs; // already inserted registers fn.foreachInstruction([&](const ir::Instruction &insn) { - // Special registers are immutable. So only check sources const uint32_t srcNum = insn.getSrcNum(); for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { const ir::Register reg = insn.getSrc(srcID); @@ -108,24 +131,14 @@ namespace gbe specialRegs.insert(reg); } }); - - kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE); - if (this->simdWidth == 16) - if ((kernel->curbeSize + GEN_REG_SIZE) % (2*GEN_REG_SIZE) != 0) - kernel->curbeSize += GEN_REG_SIZE; - - // Local IDs always go at the end of the curbe - const size_t localIDSize = sizeof(uint32_t) * this->simdWidth; - const PatchInfo lid0(GBE_CURBE_LOCAL_ID_X, 0, kernel->curbeSize+0*localIDSize); - const PatchInfo lid1(GBE_CURBE_LOCAL_ID_Y, 0, kernel->curbeSize+1*localIDSize); - const PatchInfo lid2(GBE_CURBE_LOCAL_ID_Z, 0, kernel->curbeSize+2*localIDSize); - kernel->patches.push_back(lid0); - kernel->patches.push_back(lid1); - kernel->patches.push_back(lid2); +#undef INSERT_REG // After this point the vector is immutable. Sorting it will make // research faster std::sort(kernel->patches.begin(), kernel->patches.end()); + + // Align it on 128 bytes properly + kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE); } void Context::buildArgList(void) { @@ -171,8 +184,7 @@ namespace gbe bool Context::isScalarReg(const ir::Register ®) const { GBE_ASSERT(fn.getProfile() == ir::Profile::PROFILE_OCL); - if (fn.getInput(reg) != NULL) - return true; + if (fn.getInput(reg) != NULL) return true; if (reg == ir::ocl::groupid0 || reg == ir::ocl::groupid1 || reg == ir::ocl::groupid2 || diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 24d562d..93ccd32 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -131,7 +131,6 @@ namespace gbe void GenContext::emitUnaryInstruction(const ir::UnaryInstruction &insn) { GBE_ASSERT(insn.getOpcode() == ir::OP_MOV); p->MOV(reg(insn.getDst(0)), reg(insn.getSrc(0))); - //p->MOV(GenReg::retype(reg(insn.getDst(0)), GEN_TYPE_UD), GenReg::retype(reg(insn.getSrc(0)), GEN_TYPE_UD)); } void GenContext::emitBinaryInstruction(const ir::BinaryInstruction &insn) { @@ -243,7 +242,6 @@ namespace gbe const GenReg value = reg(insn.getValue(0)); // XXX remove that later. Now we just copy everything to GRFs to make it // contiguous -#if 1 if (this->simdWidth == 8) { p->MOV(GenReg::vec8grf(112, 0), GenReg::retype(address, GEN_TYPE_F)); p->MOV(GenReg::vec8grf(113, 0), GenReg::retype(value, GEN_TYPE_F)); @@ -254,19 +252,6 @@ namespace gbe p->UNTYPED_WRITE(GenReg::vec16grf(112, 0), 0, 1); } else NOT_IMPLEMENTED; -#else - if (this->simdWidth == 8) { - p->MOV(GenReg::ud8grf(112, 0), GenReg::retype(address, GEN_TYPE_UD)); - p->MOV(GenReg::ud8grf(113, 0), GenReg::retype(value, GEN_TYPE_UD)); - p->UNTYPED_WRITE(GenReg::vec8grf(112, 0), 0, 1); - } else if (this->simdWidth == 16) { - p->MOV(GenReg::ud16grf(112, 0), GenReg::retype(address, GEN_TYPE_UD)); - p->MOV(GenReg::ud16grf(114, 0), GenReg::retype(value, GEN_TYPE_UD)); - p->UNTYPED_WRITE(GenReg::vec16grf(112, 0), 0, 1); - } else - NOT_IMPLEMENTED; - -#endif } void GenContext::emitFenceInstruction(const ir::FenceInstruction &insn) {} void GenContext::emitLabelInstruction(const ir::LabelInstruction &insn) {} diff --git a/backend/src/backend/gen_eu.hpp b/backend/src/backend/gen_eu.hpp index 583cb80..2fa8abc 100644 --- a/backend/src/backend/gen_eu.hpp +++ b/backend/src/backend/gen_eu.hpp @@ -141,38 +141,38 @@ namespace gbe static INLINE GenReg vec16(uint32_t file, uint32_t nr, uint32_t subnr) { return GenReg(file, - nr, - subnr, - GEN_TYPE_F, - GEN_VERTICAL_STRIDE_8, - GEN_WIDTH_8, - GEN_HORIZONTAL_STRIDE_1, - GEN_SWIZZLE_XYZW, - WRITEMASK_XYZW); + nr, + subnr, + GEN_TYPE_F, + GEN_VERTICAL_STRIDE_8, + GEN_WIDTH_8, + GEN_HORIZONTAL_STRIDE_1, + GEN_SWIZZLE_XYZW, + WRITEMASK_XYZW); } static INLINE GenReg vec8(uint32_t file, uint32_t nr, uint32_t subnr) { return GenReg(file, - nr, - subnr, - GEN_TYPE_F, - GEN_VERTICAL_STRIDE_8, - GEN_WIDTH_8, - GEN_HORIZONTAL_STRIDE_1, - GEN_SWIZZLE_XYZW, - WRITEMASK_XYZW); + nr, + subnr, + GEN_TYPE_F, + GEN_VERTICAL_STRIDE_8, + GEN_WIDTH_8, + GEN_HORIZONTAL_STRIDE_1, + GEN_SWIZZLE_XYZW, + WRITEMASK_XYZW); } static INLINE GenReg vec4(uint32_t file, uint32_t nr, uint32_t subnr) { return GenReg(file, - nr, - subnr, - GEN_TYPE_F, - GEN_VERTICAL_STRIDE_4, - GEN_WIDTH_4, - GEN_HORIZONTAL_STRIDE_1, - GEN_SWIZZLE_XYZW, - WRITEMASK_XYZW); + nr, + subnr, + GEN_TYPE_F, + GEN_VERTICAL_STRIDE_4, + GEN_WIDTH_4, + GEN_HORIZONTAL_STRIDE_1, + GEN_SWIZZLE_XYZW, + WRITEMASK_XYZW); } static INLINE GenReg vec2(uint32_t file, uint32_t nr, uint32_t subnr) { diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h index 56de46a..64020be 100644 --- a/backend/src/backend/program.h +++ b/backend/src/backend/program.h @@ -71,7 +71,8 @@ enum gbe_curbe_type { GBE_CURBE_IMAGE_WIDTH, GBE_CURBE_IMAGE_HEIGHT, GBE_CURBE_IMAGE_DEPTH, - GBE_CURBE_KERNEL_ARGUMENT + GBE_CURBE_KERNEL_ARGUMENT, + GBE_CURBE_BLOCK_IP }; /*! Create a new program from the given source code (zero terminated string) */ diff --git a/backend/src/backend/sim_context.cpp b/backend/src/backend/sim_context.cpp index 04176f5..8b43c1f 100644 --- a/backend/src/backend/sim_context.cpp +++ b/backend/src/backend/sim_context.cpp @@ -55,6 +55,7 @@ namespace gbe usedRegs.insert(insn.getDst(dstID)); }); + // Declare register variables const uint32_t regNum = fn.regNum(); bool lid0 = false, lid1 = false, lid2 = false; // for local id registers for (uint32_t regID = 0; regID < regNum; ++regID) { @@ -276,8 +277,8 @@ namespace gbe #undef LOAD_SPECIAL_REG SVAR(OCL_GCC_SIM_COMPILER, "gcc"); - SVAR(OCL_GCC_SIM_COMPILER_OPTIONS, "-Wall -fPIC -shared -msse -msse2 -msse3 -mssse3 -msse4.1 -g -O3"); SVAR(OCL_ICC_SIM_COMPILER, "icc"); + SVAR(OCL_GCC_SIM_COMPILER_OPTIONS, "-Wall -fPIC -shared -msse -msse2 -msse3 -mssse3 -msse4.1 -g -O3"); SVAR(OCL_ICC_SIM_COMPILER_OPTIONS, "-Wall -ldl -fabi-version=2 -fPIC -shared -O3 -g"); BVAR(OCL_USE_ICC, false); diff --git a/backend/src/backend/sim_context.hpp b/backend/src/backend/sim_context.hpp index 61fcb43..832cd23 100644 --- a/backend/src/backend/sim_context.hpp +++ b/backend/src/backend/sim_context.hpp @@ -54,6 +54,8 @@ namespace gbe void emitCurbeLoad(void); /*! Emit the masking code (mask / UIP) */ void emitMaskingCode(void); + /*! Emit the instructions */ + void emitInstructionStream(void); /*! Implements base class */ virtual Kernel *allocateKernel(void); std::ofstream o; //!< Where to output the c++ string -- 2.7.4