From 5af0957253120bfca595c88f4f9038084d0c2b5c Mon Sep 17 00:00:00 2001 From: Zhigang Gong Date: Wed, 15 Jan 2014 19:50:55 +0800 Subject: [PATCH] GBE/Sampler: Simplfy the sampler handling. Mov the sampler allocation to the Gen stage. Then we don't need to maintain a fake key register which may also confusing the latter register allocation phase. Signed-off-by: Zhigang Gong Reviewed-by: "Yang, Rong R" --- backend/src/backend/context.cpp | 6 +-- backend/src/backend/gen_insn_selection.cpp | 17 ++++----- backend/src/backend/gen_reg_allocation.cpp | 31 ++++++++++++---- backend/src/ir/instruction.cpp | 59 ++++++++++++++++++------------ backend/src/ir/instruction.hpp | 11 ++++-- backend/src/ir/sampler.cpp | 53 ++++----------------------- backend/src/ir/sampler.hpp | 10 ++--- backend/src/llvm/llvm_gen_backend.cpp | 26 ++++++------- 8 files changed, 100 insertions(+), 113 deletions(-) diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp index 2543cae..5a01132 100644 --- a/backend/src/backend/context.cpp +++ b/backend/src/backend/context.cpp @@ -489,9 +489,9 @@ namespace gbe continue; } else if (insn.getOpcode() == ir::OP_GET_SAMPLER_INFO) { /* change the src to sampler information register. */ - GBE_ASSERT(insn.getSrc(1) == ir::ocl::samplerinfo); - if (curbeRegs.find(insn.getSrc(1)) == curbeRegs.end()) - insertCurbeReg(insn.getSrc(1), this->newCurbeEntry(GBE_CURBE_SAMPLER_INFO, 0, 32)); + GBE_ASSERT(insn.getSrc(0) == ir::ocl::samplerinfo); + if (curbeRegs.find(insn.getSrc(0)) == curbeRegs.end()) + insertCurbeReg(insn.getSrc(0), this->newCurbeEntry(GBE_CURBE_SAMPLER_INFO, 0, 32)); continue; } if (fn.isSpecialReg(reg) == false) continue; diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 1769f17..0118ae4 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -2933,10 +2933,10 @@ namespace gbe { using namespace ir; GenRegister msgPayloads[4]; - GenRegister dst[insn.getDstNum()], src[insn.getSrcNum() - 2]; + GenRegister dst[insn.getDstNum()], src[insn.getSrcNum() - 1]; uint32_t srcNum = insn.getSrcNum(); uint32_t samplerOffset = 0; - if (srcNum == 6) { + if (srcNum == 5) { /* We have the clamp border workaround. */ samplerOffset = insn.getSrc(srcNum - 1).value() * 8; srcNum--; @@ -2948,15 +2948,14 @@ namespace gbe for (uint32_t valueID = 0; valueID < insn.getDstNum(); ++valueID) dst[valueID] = sel.selReg(insn.getDst(valueID), insn.getDstType()); - for (uint32_t valueID = 0; valueID < srcNum - 2; ++valueID) - src[valueID] = sel.selReg(insn.getSrc(valueID + 2), insn.getSrcType()); + for (uint32_t valueID = 0; valueID < srcNum - 1; ++valueID) + src[valueID] = sel.selReg(insn.getSrc(valueID + 1), insn.getSrcType()); uint32_t bti = sel.ctx.getFunction().getImageSet()->getIdx - (insn.getSrc(SampleInstruction::SURFACE_BTI)); - uint32_t sampler = sel.ctx.getFunction().getSamplerSet()->getIdx - (insn.getSrc(SampleInstruction::SAMPLER_BTI)) + samplerOffset; + (insn.getSrc(0)); + uint32_t sampler = insn.getSamplerIndex() + samplerOffset; - sel.SAMPLE(dst, insn.getDstNum(), src, srcNum - 2, msgPayloads, 4, bti, sampler); + sel.SAMPLE(dst, insn.getDstNum(), src, srcNum - 1, msgPayloads, 4, bti, sampler); return true; } DECL_CTOR(SampleInstruction, 1, 1); @@ -3017,7 +3016,7 @@ namespace gbe using namespace ir; GenRegister dst, src; dst = sel.selReg(insn.getDst(0), TYPE_U16); - src = GenRegister::offset(GenRegister::uw1grf(insn.getSrc(1)), 0, sel.ctx.getFunction().getSamplerSet()->getIdx(insn.getSrc(0)) * 2); + src = GenRegister::offset(GenRegister::uw1grf(insn.getSrc(0)), 0, insn.getSamplerIndex() * 2); src.subphysical = 1; sel.MOV(dst, src); return true; diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp index 2e2be04..7365e02 100644 --- a/backend/src/backend/gen_reg_allocation.cpp +++ b/backend/src/backend/gen_reg_allocation.cpp @@ -30,6 +30,8 @@ #include "sys/exception.hpp" #include #include +#include +#include #define RESERVED_REG_NUM_FOR_SPILL 6 @@ -639,8 +641,6 @@ namespace gbe this->intervals[ocl::emask].maxID = INT_MAX; this->intervals[ocl::notemask].minID = 0; this->intervals[ocl::notemask].maxID = INT_MAX; -// this->intervals[ocl::barriermask].minID = 0; -// this->intervals[ocl::barriermask].maxID = INT_MAX; this->intervals[ocl::retVal].minID = INT_MAX; this->intervals[ocl::retVal].maxID = -INT_MAX; @@ -672,13 +672,28 @@ namespace gbe } INLINE void GenRegAllocator::Opaque::outputAllocation(void) { - std::cout << "## register allocation ##" << std::endl; + using namespace std; + cout << "## register allocation ##" << endl; for(auto &i : RA) { - int vReg = (int)i.first; - int offst = (int)i.second / sizeof(float); - int reg = offst / 8; - int subreg = offst % 8; - std::cout << "%" << vReg << " g" << reg << "." << subreg << "D" << std::endl; + ir::Register vReg = (ir::Register)i.first; + int offst = (int)i.second;// / sizeof(float); + ir::RegisterData regData = ctx.sel->getRegisterData(vReg); + int reg = offst / 32; + int subreg = offst % 32; + ir::RegisterFamily family = regData.family; + int registerSize; + if (family == ir::FAMILY_BOOL) + registerSize = 2; + else { + registerSize = ir::getFamilySize(regData.family); + if (!ctx.isScalarReg(vReg)) + registerSize *= ctx.getSimdWidth(); + } + cout << "%" << setw(-8) << vReg << "\tg" << setw(-3) << reg << "." << setw(-2) << subreg << "B" + << "\t" << setw(3) << registerSize + << "\t[" << setw(8) << this->intervals[(uint)vReg].minID + << " -> " << setw(8) << this->intervals[(uint)vReg].maxID + << "]" << endl; } std::set::iterator is; std::cout << "## spilled registers:" << std::endl; diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index b898820..182b95e 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -491,12 +491,13 @@ namespace ir { public TupleDstPolicy { public: - SampleInstruction(Tuple dstTuple, Tuple srcTuple, Type dstType, Type srcType) { + SampleInstruction(Tuple dstTuple, Tuple srcTuple, bool dstIsFloat, bool srcIsFloat, uint8_t sampler) { this->opcode = OP_SAMPLE; this->dst = dstTuple; this->src = srcTuple; - this->dstType = dstType; - this->srcType = srcType; + this->dstIsFloat = dstIsFloat; + this->srcIsFloat = srcIsFloat; + this->samplerIdx = sampler; } INLINE bool wellFormed(const Function &fn, std::string &why) const; INLINE void out(std::ostream &out, const Function &fn) const { @@ -504,24 +505,27 @@ namespace ir { out << "." << this->getDstType() << "." << this->getSrcType() << " surface id %" << this->getSrc(fn, 0) - << " sampler %" << this->getSrc(fn, 1) - << " coord u %" << this->getSrc(fn, 2) - << " coord v %" << this->getSrc(fn, 3) - << " coord w %" << this->getSrc(fn, 4) + << " coord u %" << this->getSrc(fn, 1) + << " coord v %" << this->getSrc(fn, 2) + << " coord w %" << this->getSrc(fn, 3) << " %" << this->getDst(fn, 0) << " %" << this->getDst(fn, 1) << " %" << this->getDst(fn, 2) - << " %" << this->getDst(fn, 3); + << " %" << this->getDst(fn, 3) + << " sampler idx " << (int)this->samplerIdx; } Tuple src; Tuple dst; - Type srcType; - Type dstType; - INLINE Type getSrcType(void) const { return this->srcType; } - INLINE Type getDstType(void) const { return this->dstType; } + INLINE Type getSrcType(void) const { return this->srcIsFloat ? TYPE_FLOAT : TYPE_S32; } + INLINE Type getDstType(void) const { return this->dstIsFloat ? TYPE_FLOAT : TYPE_U32; } + INLINE const uint8_t getSamplerIndex(void) const { return this->samplerIdx; } - static const uint32_t srcNum = 6; + uint16_t srcIsFloat:1; + uint16_t dstIsFloat:1; + uint16_t samplerIdx:4; + uint16_t imageIdx:8; // not used yet. + static const uint32_t srcNum = 5; static const uint32_t dstNum = 4; }; @@ -565,29 +569,34 @@ namespace ir { class ALIGNED_INSTRUCTION GetSamplerInfoInstruction : public BasePolicy, - public NSrcPolicy, + public NSrcPolicy, public NDstPolicy { public: GetSamplerInfoInstruction( Register dst, - Register src, - Register samplerInfo) + Register samplerInfo, + uint8_t samplerIdx) { this->opcode = OP_GET_SAMPLER_INFO; this->dst[0] = dst; - this->src[0] = src; - this->src[1] = samplerInfo; + this->src[0] = samplerInfo; + this->samplerIdx = samplerIdx; } INLINE bool wellFormed(const Function &fn, std::string &why) const; INLINE void out(std::ostream &out, const Function &fn) const { this->outOpcode(out); out << " %" << this->getDst(fn, 0) - << " sampler id %" << this->getSrc(fn, 0); + << " %" << this->getSrc(fn, 0) + << " sampler idx " << (int)this->samplerIdx; + } + INLINE const uint8_t getSamplerIndex() const { + return this->samplerIdx; } - Register src[2]; //!< Surface to get info + Register src[1]; //!< sampler to get info Register dst[1]; //!< return value + uint8_t samplerIdx; //!< sampler slot index. static const uint32_t dstNum = 1; }; @@ -1455,9 +1464,11 @@ DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex()) DECL_MEM_FN(SyncInstruction, uint32_t, getParameters(void), getParameters()) DECL_MEM_FN(SampleInstruction, Type, getSrcType(void), getSrcType()) DECL_MEM_FN(SampleInstruction, Type, getDstType(void), getDstType()) +DECL_MEM_FN(SampleInstruction, const uint8_t, getSamplerIndex(void), getSamplerIndex()) DECL_MEM_FN(TypedWriteInstruction, Type, getSrcType(void), getSrcType()) DECL_MEM_FN(TypedWriteInstruction, Type, getCoordType(void), getCoordType()) DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType()) +DECL_MEM_FN(GetSamplerInfoInstruction, const uint8_t, getSamplerIndex(void), getSamplerIndex()) #undef DECL_MEM_FN @@ -1635,8 +1646,8 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType()) } // SAMPLE - Instruction SAMPLE(Tuple dst, Tuple src, Type dstType, Type srcType) { - return internal::SampleInstruction(dst, src, dstType, srcType).convert(); + Instruction SAMPLE(Tuple dst, Tuple src, bool dstIsFloat, bool srcIsFloat, uint8_t sampler) { + return internal::SampleInstruction(dst, src, dstIsFloat, srcIsFloat, sampler).convert(); } Instruction TYPED_WRITE(Tuple src, Type srcType, Type coordType) { @@ -1647,8 +1658,8 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType()) return internal::GetImageInfoInstruction(infoType, dst, src, infoReg).convert(); } - Instruction GET_SAMPLER_INFO(Register dst, Register src, Register samplerInfo) { - return internal::GetSamplerInfoInstruction(dst, src, samplerInfo).convert(); + Instruction GET_SAMPLER_INFO(Register dst, Register samplerInfo, uint8_t samplerIdx) { + return internal::GetSamplerInfoInstruction(dst, samplerInfo, samplerIdx).convert(); } std::ostream &operator<< (std::ostream &out, const Instruction &insn) { diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index a3255b1..ce61106 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -363,10 +363,12 @@ namespace ir { SURFACE_BTI = 0, SAMPLER_BTI = 1 }; - /*! Return true if the given instruction is an instance of this class */ - static bool isClassOf(const Instruction &insn); + + const uint8_t getSamplerIndex(void) const; Type getSrcType(void) const; Type getDstType(void) const; + /*! Return true if the given instruction is an instance of this class */ + static bool isClassOf(const Instruction &insn); }; typedef union { @@ -415,6 +417,7 @@ namespace ir { class GetSamplerInfoInstruction : public Instruction { public: + const uint8_t getSamplerIndex(void) const; /*! Return true if the given instruction is an instance of this class */ static bool isClassOf(const Instruction &insn); }; @@ -665,11 +668,11 @@ namespace ir { /*! typed write */ Instruction TYPED_WRITE(Tuple src, Type srcType, Type coordType); /*! sample textures */ - Instruction SAMPLE(Tuple dst, Tuple src, Type dstType, Type srcType); + Instruction SAMPLE(Tuple dst, Tuple src, bool dstIsFloat, bool srcIsFloat, uint8_t sampler); /*! get image information , such as width/height/depth/... */ Instruction GET_IMAGE_INFO(int infoType, Register dst, Register src, Register infoReg); /*! get sampler information */ - Instruction GET_SAMPLER_INFO(Register dst, Register src, Register samplerInfo); + Instruction GET_SAMPLER_INFO(Register dst, Register samplerInfo, uint8_t index); /*! label labelIndex */ Instruction LABEL(LabelIndex labelIndex); diff --git a/backend/src/ir/sampler.cpp b/backend/src/ir/sampler.cpp index cff1012..462fab7 100644 --- a/backend/src/ir/sampler.cpp +++ b/backend/src/ir/sampler.cpp @@ -27,34 +27,24 @@ namespace gbe { namespace ir { - const uint32_t SamplerSet::getIdx(const Register reg) const - { - auto it = regMap.find(reg); - GBE_ASSERT(it != regMap.end()); - return it->second.slot; - } - - void SamplerSet::appendReg(const Register reg, uint32_t key, Context *ctx) { + uint8_t SamplerSet::appendReg(uint32_t key, Context *ctx) { struct SamplerRegSlot samplerSlot; - samplerSlot.reg = reg; samplerSlot.slot = samplerMap.size(); samplerMap.insert(std::make_pair(key, samplerSlot)); - regMap.insert(std::make_pair(samplerSlot.reg, samplerSlot)); + return samplerSlot.slot; } - Register SamplerSet::append(uint32_t samplerValue, Context *ctx) + uint8_t SamplerSet::append(uint32_t samplerValue, Context *ctx) { auto it = samplerMap.find(samplerValue); if (it != samplerMap.end()) - return it->second.reg; + return it->second.slot; // This register is just used as a key. - Register reg = ctx->reg(FAMILY_DWORD); - appendReg(reg, samplerValue, ctx); - return reg; + return appendReg(samplerValue, ctx); } #define SAMPLER_ID(id) ((id << __CLK_SAMPLER_ARG_BASE) | __CLK_SAMPLER_ARG_KEY_BIT) - void SamplerSet::append(Register samplerReg, Context *ctx) + uint8_t SamplerSet::append(Register samplerReg, Context *ctx) { ir::FunctionArgument *arg = ctx->getFunction().getArg(samplerReg); GBE_ASSERT(arg != NULL); @@ -68,13 +58,11 @@ namespace ir { auto it = samplerMap.find(SAMPLER_ID(id)); if (it != samplerMap.end()) { - GBE_ASSERT(it->second.reg == samplerReg); - return; + return it->second.slot; } - appendReg(samplerReg, SAMPLER_ID(id), ctx); + return appendReg(SAMPLER_ID(id), ctx); } - #define OUT_UPDATE_SZ(elt) SERIALIZE_OUT(elt, outs, ret_size) #define IN_UPDATE_SZ(elt) DESERIALIZE_IN(elt, ins, total_size) @@ -91,13 +79,6 @@ namespace ir { OUT_UPDATE_SZ(iter.second.slot); } - OUT_UPDATE_SZ(regMap.size()); - for (auto iter : regMap) { - OUT_UPDATE_SZ(iter.first); - OUT_UPDATE_SZ(iter.second.reg); - OUT_UPDATE_SZ(iter.second.slot); - } - OUT_UPDATE_SZ(magic_end); OUT_UPDATE_SZ(ret_size); @@ -124,17 +105,6 @@ namespace ir { samplerMap.insert(std::make_pair(key, reg_slot)); } - IN_UPDATE_SZ(sampler_map_sz); - for (size_t i = 0; i < sampler_map_sz; i++) { - ir::Register key; - ir::SamplerRegSlot reg_slot; - - IN_UPDATE_SZ(key); - IN_UPDATE_SZ(reg_slot.reg); - IN_UPDATE_SZ(reg_slot.slot); - regMap.insert(std::make_pair(key, reg_slot)); - } - IN_UPDATE_SZ(magic); if (magic != magic_end) return 0; @@ -162,13 +132,6 @@ namespace ir { << iter.second.reg << ", " << iter.second.slot << "]\n"; } - outs << spaces_nl << " SamplerSet Map: [reg, sampler_reg, sampler_slot]\n"; - outs << spaces_nl << " regMap size: " << regMap.size() << "\n"; - for (auto iter : regMap) { - outs << spaces_nl << " [" << iter.first << ", " - << iter.second.reg << ", " << iter.second.slot << "]\n"; - } - outs << spaces << "------------- End SamplerSet -------------" << "\n"; } diff --git a/backend/src/ir/sampler.hpp b/backend/src/ir/sampler.hpp index 3c72e3e..e6706b9 100644 --- a/backend/src/ir/sampler.hpp +++ b/backend/src/ir/sampler.hpp @@ -47,11 +47,9 @@ namespace ir { /*! Append the specified sampler and return the allocated offset. * If the speficied sampler is exist, only return the previous offset and * don't append it again. Return -1, if failed.*/ - Register append(uint32_t clkSamplerValue, Context *ctx); + uint8_t append(uint32_t clkSamplerValue, Context *ctx); /*! Append a sampler defined in kernel args. */ - void append(Register samplerArg, Context *ctx); - /*! Get the sampler idx (actual location) */ - const uint32_t getIdx(const Register reg) const; + uint8_t append(Register samplerArg, Context *ctx); size_t getDataSize(void) { return samplerMap.size(); } size_t getDataSize(void) const { return samplerMap.size(); } void getData(uint32_t *samplers) const { @@ -60,7 +58,6 @@ namespace ir { } void operator = (const SamplerSet& other) { - regMap.insert(other.regMap.begin(), other.regMap.end()); samplerMap.insert(other.samplerMap.begin(), other.samplerMap.end()); } @@ -90,9 +87,8 @@ namespace ir { virtual void printStatus(int indent, std::ostream& outs); private: - void appendReg(const Register reg, uint32_t key, Context *ctx); + uint8_t appendReg(uint32_t key, Context *ctx); map samplerMap; - map regMap; GBE_CLASS(SamplerSet); }; } /* namespace ir */ diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 002a161..39df7b3 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -555,7 +555,7 @@ namespace gbe // Emit unary instructions from gen native function void emitAtomicInst(CallInst &I, CallSite &CS, ir::AtomicOps opcode); - ir::Register appendSampler(CallSite::arg_iterator AI); + uint8_t appendSampler(CallSite::arg_iterator AI); // These instructions are not supported at all void visitVAArgInst(VAArgInst &I) {NOT_SUPPORTED;} @@ -2221,21 +2221,22 @@ namespace gbe /* append a new sampler. should be called before any reference to * a sampler_t value. */ - ir::Register GenWriter::appendSampler(CallSite::arg_iterator AI) { + uint8_t GenWriter::appendSampler(CallSite::arg_iterator AI) { Constant *CPV = dyn_cast(*AI); - ir::Register sampler; + uint8_t index; if (CPV != NULL) { // This is not a kernel argument sampler, we need to append it to sampler set, // and allocate a sampler slot for it. auto x = processConstant(CPV, InsertExtractFunctor(ctx)); GBE_ASSERTM(x.type == ir::TYPE_U16 || x.type == ir::TYPE_S16, "Invalid sampler type"); - sampler = ctx.getFunction().getSamplerSet()->append(x.data.u32, &ctx); + + index = ctx.getFunction().getSamplerSet()->append(x.data.u32, &ctx); } else { - sampler = this->getRegister(*AI); - ctx.getFunction().getSamplerSet()->append(sampler, &ctx); + const ir::Register samplerReg = this->getRegister(*AI); + index = ctx.getFunction().getSamplerSet()->append(samplerReg, &ctx); } - return sampler; + return index; } void GenWriter::emitCallInst(CallInst &I) { @@ -2368,9 +2369,9 @@ namespace gbe case GEN_OCL_GET_SAMPLER_INFO: { GBE_ASSERT(AI != AE); - const ir::Register sampler = this->appendSampler(AI); ++AI; + const uint8_t index = this->appendSampler(AI); ++AI; const ir::Register reg = this->getRegister(&I, 0); - ctx.GET_SAMPLER_INFO(reg, sampler, ir::ocl::samplerinfo); + ctx.GET_SAMPLER_INFO(reg, ir::ocl::samplerinfo, index); break; } case GEN_OCL_READ_IMAGE0: @@ -2388,7 +2389,7 @@ namespace gbe { GBE_ASSERT(AI != AE); const ir::Register surface_id = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); - const ir::Register sampler = this->appendSampler(AI); + const uint8_t sampler = this->appendSampler(AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI; @@ -2406,7 +2407,6 @@ namespace gbe dstTupleData.push_back(reg); } srcTupleData.push_back(surface_id); - srcTupleData.push_back(sampler); srcTupleData.push_back(ucoord); srcTupleData.push_back(vcoord); srcTupleData.push_back(wcoord); @@ -2422,7 +2422,7 @@ namespace gbe #endif srcTupleData.push_back(offsetReg); const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], elemNum); - const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 6); + const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 5); ir::Type srcType = ir::TYPE_S32, dstType = ir::TYPE_U32; @@ -2454,7 +2454,7 @@ namespace gbe GBE_ASSERT(0); // never been here. } - ctx.SAMPLE(dstTuple, srcTuple, dstType, srcType); + ctx.SAMPLE(dstTuple, srcTuple, dstType == ir::TYPE_FLOAT, srcType == ir::TYPE_FLOAT, sampler); break; } case GEN_OCL_WRITE_IMAGE0: -- 2.7.4