From ef33766f8c3e3de94d723c5c1ceef5405c1ae487 Mon Sep 17 00:00:00 2001 From: Zhigang Gong Date: Thu, 24 Apr 2014 15:24:07 +0800 Subject: [PATCH] GBE: Avoid unecessary dag/liveness computing at backend. We don't need to compute dag/liveness at the backend when we switch to a new code gen strategy. For the unit test case, this patch could save 15% of the overall execution time. For the luxmark with STRICT conformance mode, it saves about 40% of the build time. v3: fix some minor bugs. Signed-off-by: Zhigang Gong Reviewed-by: "Yang, Rong R" --- backend/src/backend/context.cpp | 28 ++++++++++++++++++++-------- backend/src/backend/context.hpp | 2 ++ backend/src/backend/gen_context.cpp | 26 +++++++++++++++++++------- backend/src/backend/gen_context.hpp | 5 +++-- backend/src/backend/gen_insn_selection.cpp | 10 ++++++++-- backend/src/backend/gen_program.cpp | 17 +++++++++++++---- backend/src/backend/gen_reg_allocation.cpp | 5 +++-- 7 files changed, 68 insertions(+), 25 deletions(-) diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp index dc27d83..6a0bca2 100644 --- a/backend/src/backend/context.cpp +++ b/backend/src/backend/context.cpp @@ -335,12 +335,8 @@ namespace gbe this->liveness = GBE_NEW(ir::Liveness, const_cast(fn)); this->dag = GBE_NEW(ir::FunctionDAG, *this->liveness); // r0 (GEN_REG_SIZE) is always set by the HW and used at the end by EOT - this->registerAllocator = GBE_NEW(RegisterAllocator, GEN_REG_SIZE, 4*KB - GEN_REG_SIZE); - this->scratchAllocator = GBE_NEW(ScratchAllocator, 12*KB); - if (fn.getSimdWidth() == 0 || OCL_SIMD_WIDTH != 15) - this->simdWidth = nextHighestPowerOf2(OCL_SIMD_WIDTH); - else - this->simdWidth = fn.getSimdWidth(); + this->registerAllocator = NULL; //GBE_NEW(RegisterAllocator, GEN_REG_SIZE, 4*KB - GEN_REG_SIZE); + this->scratchAllocator = NULL; //GBE_NEW(ScratchAllocator, 12*KB); } Context::~Context(void) { @@ -350,12 +346,28 @@ namespace gbe GBE_SAFE_DELETE(this->liveness); } + void Context::startNewCG(uint32_t simdWidth) { + if (simdWidth == 0 || OCL_SIMD_WIDTH != 15) + this->simdWidth = nextHighestPowerOf2(OCL_SIMD_WIDTH); + else + this->simdWidth = simdWidth; + GBE_SAFE_DELETE(this->registerAllocator); + GBE_SAFE_DELETE(this->scratchAllocator); + GBE_ASSERT(dag != NULL && liveness != NULL); + this->registerAllocator = GBE_NEW(RegisterAllocator, GEN_REG_SIZE, 4*KB - GEN_REG_SIZE); + this->scratchAllocator = GBE_NEW(ScratchAllocator, 12*KB); + this->curbeRegs.clear(); + this->JIPs.clear(); + } + Kernel *Context::compileKernel(void) { this->kernel = this->allocateKernel(); this->kernel->simdWidth = this->simdWidth; this->buildArgList(); - this->buildUsedLabels(); - this->buildJIPs(); + if (usedLabels.size() == 0) + this->buildUsedLabels(); + if (JIPs.size() == 0) + this->buildJIPs(); this->buildStack(); this->handleSLM(); if (this->emitCode() == false) { diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp index 26167a0..d4dcfca 100644 --- a/backend/src/backend/context.hpp +++ b/backend/src/backend/context.hpp @@ -56,6 +56,8 @@ namespace gbe Context(const ir::Unit &unit, const std::string &name); /*! Release everything needed */ virtual ~Context(void); + /*! start new code generation with specific simd width. */ + void startNewCG(uint32_t simdWidth); /*! Compile the code */ Kernel *compileKernel(void); /*! Tells if the labels is used */ diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 34e3e61..4da47f8 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -46,15 +46,12 @@ namespace gbe GenContext::GenContext(const ir::Unit &unit, const std::string &name, uint32_t deviceID, - uint32_t reservedSpillRegs, - bool limitRegisterPressure, bool relaxMath) : - Context(unit, name), deviceID(deviceID), reservedSpillRegs(reservedSpillRegs), - limitRegisterPressure(limitRegisterPressure), relaxMath(relaxMath) + Context(unit, name), deviceID(deviceID), relaxMath(relaxMath) { - this->p = GBE_NEW(GenEncoder, simdWidth, 7, deviceID); // XXX handle more than Gen7 - this->sel = GBE_NEW(Selection, *this); - this->ra = GBE_NEW(GenRegAllocator, *this); + this->p = NULL; + this->sel = NULL; + this->ra = NULL; } GenContext::~GenContext(void) { @@ -63,6 +60,21 @@ namespace gbe GBE_DELETE(this->p); } + void GenContext::startNewCG(uint32_t simdWidth, uint32_t reservedSpillRegs, bool limitRegisterPressure) { + this->limitRegisterPressure = limitRegisterPressure; + this->reservedSpillRegs = reservedSpillRegs; + Context::startNewCG(simdWidth); + GBE_SAFE_DELETE(ra); + GBE_SAFE_DELETE(sel); + GBE_SAFE_DELETE(p); + this->p = GBE_NEW(GenEncoder, this->simdWidth, 7, deviceID); // XXX handle more than Gen7 + this->sel = GBE_NEW(Selection, *this); + this->ra = GBE_NEW(GenRegAllocator, *this); + this->branchPos2.clear(); + this->branchPos3.clear(); + this->labelPos.clear(); + } + void GenContext::emitInstructionStream(void) { // Emit Gen ISA for (auto &block : *sel->blockList) diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index 12434f5..14ea719 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -52,10 +52,11 @@ namespace gbe /*! Create a new context. name is the name of the function we want to * compile */ - GenContext(const ir::Unit &unit, const std::string &name, uint32_t deviceID, uint32_t reservedSpillRegs = 0, - bool limitRegisterPressure = false, bool relaxMath = false); + GenContext(const ir::Unit &unit, const std::string &name, uint32_t deviceID, bool relaxMath = false); /*! Release everything needed */ ~GenContext(void); + /*! Start new code generation with specific parameters */ + void startNewCG(uint32_t simdWidth, uint32_t reservedSpillRegs, bool limitRegisterPressure); /*! Target device ID*/ uint32_t deviceID; /*! Implements base class */ diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 32086d3..c05a97b 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -740,7 +740,10 @@ namespace gbe } if (poolOffset > ctx.reservedSpillRegs) { - std::cerr << "Instruction (#" << (uint32_t)insn.opcode << ") src too large pooloffset " << (uint32_t)poolOffset << std::endl; + if (GBE_DEBUG) + std::cerr << "Instruction (#" << (uint32_t)insn.opcode + << ") src too large pooloffset " + << (uint32_t)poolOffset << std::endl; return false; } while(!regSet.empty()) { @@ -798,7 +801,10 @@ namespace gbe } if (poolOffset > ctx.reservedSpillRegs){ - std::cerr << "Instruction (#" << (uint32_t)insn.opcode << ") dst too large pooloffset " << (uint32_t)poolOffset << std::endl; + if (GBE_DEBUG) + std::cerr << "Instruction (#" << (uint32_t)insn.opcode + << ") dst too large pooloffset " + << (uint32_t)poolOffset << std::endl; return false; } while(!regSet.empty()) { diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 4dca79d..83ec5a6 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -83,6 +83,7 @@ namespace gbe { {16, 10, false}, {8, 0, false}, {8, 8, false}, + {8, 16, false}, }; Kernel *GenProgram::compileKernel(const ir::Unit &unit, const std::string &name, bool relaxMath) { @@ -91,11 +92,20 @@ namespace gbe { // when the function already provides the simd width we need to use (i.e. // non zero) const ir::Function *fn = unit.getFunction(name); - const uint32_t codeGenNum = fn->getSimdWidth() != 0 ? 2 : 4; - uint32_t codeGen = fn->getSimdWidth() == 8 ? 2 : 0; + uint32_t codeGenNum = sizeof(codeGenStrategy) / sizeof(codeGenStrategy[0]); + uint32_t codeGen = 0; + if (fn->getSimdWidth() == 8) { + codeGen = 2; + } else if (fn->getSimdWidth() == 16) { + codeGenNum = 2; + } else if (fn->getSimdWidth() == 0) { + codeGen = 0; + } else + GBE_ASSERT(0); Kernel *kernel = NULL; // Stop when compilation is successful + GenContext *ctx = GBE_NEW(GenContext, unit, name, deviceID, relaxMath); for (; codeGen < codeGenNum; ++codeGen) { const uint32_t simdWidth = codeGenStrategy[codeGen].simdWidth; const bool limitRegisterPressure = codeGenStrategy[codeGen].limitRegisterPressure; @@ -103,12 +113,11 @@ namespace gbe { // Force the SIMD width now and try to compile unit.getFunction(name)->setSimdWidth(simdWidth); - Context *ctx = GBE_NEW(GenContext, unit, name, deviceID, reservedSpillRegs, limitRegisterPressure, relaxMath); + ctx->startNewCG(simdWidth, reservedSpillRegs, limitRegisterPressure); kernel = ctx->compileKernel(); if (kernel != NULL) { break; } - GBE_DELETE(ctx); fn->getImageSet()->clearInfo(); } diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp index 32cd643..54b7cac 100644 --- a/backend/src/backend/gen_reg_allocation.cpp +++ b/backend/src/backend/gen_reg_allocation.cpp @@ -627,7 +627,6 @@ namespace gbe const uint32_t grfOffset = allocateReg(interval, size, alignment); if(grfOffset == 0) { GBE_ASSERT(!(reservedReg && family != ir::FAMILY_DWORD)); - GBE_ASSERT(ctx.reservedSpillRegs == 0 || vector->regNum < ctx.reservedSpillRegs); for(int i = vector->regNum-1; i >= 0; i--) { if (!spillReg(vector->reg[i].reg())) return false; @@ -662,7 +661,8 @@ namespace gbe allocateScratchForSpilled(); bool success = selection.spillRegs(spilledRegs, reservedReg); if (!success) { - std::cerr << "Fail to spill registers." << std::endl; + if (GBE_DEBUG) + std::cerr << "Fail to spill registers." << std::endl; return false; } } @@ -775,6 +775,7 @@ namespace gbe // from the RA map. bool success = expireReg(interval.reg); GBE_ASSERT(success); + success = success; RA.erase(interval.reg); } spilledRegs.insert(std::make_pair(interval.reg, spillTag)); -- 2.7.4