#include "backend/gen_defs.hpp"
#include "backend/gen_encoder.hpp"
#include "backend/gen_insn_selection.hpp"
+#include "backend/gen_insn_scheduling.hpp"
#include "backend/gen_reg_allocation.hpp"
#include "backend/gen/gen_mesa_disasm.h"
#include "ir/function.hpp"
///////////////////////////////////////////////////////////////////////////
// GenContext implementation
///////////////////////////////////////////////////////////////////////////
- GenContext::GenContext(const ir::Unit &unit, const std::string &name) :
- Context(unit, name)
+ GenContext::GenContext(const ir::Unit &unit,
+ const std::string &name,
+ bool limitRegisterPressure) :
+ Context(unit, name), limitRegisterPressure(limitRegisterPressure)
{
this->p = GBE_NEW(GenEncoder, simdWidth, 7); // XXX handle more than Gen7
this->sel = GBE_NEW(Selection, *this);
bool GenContext::emitCode(void) {
GenKernel *genKernel = static_cast<GenKernel*>(this->kernel);
sel->select();
+ schedulePreRegAllocation(*this, *this->sel);
if (UNLIKELY(ra->allocate(*this->sel) == false))
return false;
+ schedulePostRegAllocation(*this, *this->sel);
this->emitStackPointer();
this->emitInstructionStream();
this->patchBranches();
/*! Create a new context. name is the name of the function we want to
* compile
*/
- GenContext(const ir::Unit &unit, const std::string &name);
+ GenContext(const ir::Unit &unit, const std::string &name, bool limitRegisterPressure = false);
/*! Release everything needed */
~GenContext(void);
/*! Implements base class */
Selection *sel;
/*! Perform the register allocation */
GenRegAllocator *ra;
+ /*! Indicate if we need to tackle a register pressure issue when
+ * regenerating the code
+ */
+ bool limitRegisterPressure;
};
} /* namespace gbe */
*/
#include "backend/gen_insn_selection.hpp"
+#include "backend/gen_reg_allocation.hpp"
#include "sys/cvar.hpp"
#include "sys/intrusive_list.hpp"
MAX_MEM_SYSTEM
};
+ /*! Do we allocate after or before the register allocation? */
+ enum SchedulePolicy {
+ PRE_ALLOC = 0, // LIFO scheduling (tends to limit register pressure)
+ POST_ALLOC // FIFO scheduling (limits latency problems)
+ };
+
/*! Helper structure to handle dependencies while scheduling. Takes into
* account virtual and physical registers and memory sub-systems
*/
/*! Stores the nodes per instruction */
vector<ScheduleDAGNode*> insnNodes;
/*! Number of virtual register in the selection */
- uint32_t virtualNum;
+ uint32_t grfNum;
};
/*! Perform the instruction scheduling */
struct SelectionScheduler : public NonCopyable
{
/*! Init the book keeping structures */
- SelectionScheduler(GenContext &ctx, Selection &selection);
+ SelectionScheduler(GenContext &ctx, Selection &selection, SchedulePolicy policy);
/*! Make all lists empty */
void clearLists(void);
/*! Return the number of instructions to schedule in the DAG */
int32_t buildDAG(SelectionBlock &bb);
/*! Schedule the DAG */
void scheduleDAG(SelectionBlock &bb, int32_t insnNum);
+ /*! To limit register pressure or limit insn latency problems */
+ SchedulePolicy policy;
/*! Make ScheduleListNode allocation faster */
DECL_POOL(ScheduleListNode, listPool);
/*! Make ScheduleDAGNode allocation faster */
DependencyTracker::DependencyTracker(const Selection &selection, SelectionScheduler &scheduler) :
scheduler(scheduler)
{
- this->virtualNum = selection.getRegNum();
- nodes.resize(virtualNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_MEM_SYSTEM);
+ if (scheduler.policy == PRE_ALLOC) {
+ this->grfNum = selection.getRegNum();
+ nodes.resize(grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_MEM_SYSTEM);
+ } else {
+ const uint32_t simdWidth = scheduler.ctx.getSimdWidth();
+ GBE_ASSERT(simdWidth == 8 || simdWidth == 16);
+ this->grfNum = simdWidth == 8 ? 128 : 64;
+ nodes.resize(grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_MEM_SYSTEM);
+ }
insnNodes.resize(selection.getLargestBlockSize());
}
}
uint32_t DependencyTracker::getIndex(GenRegister reg) const {
+ // Non GRF physical register
if (reg.physical) {
GBE_ASSERT (reg.file == GEN_ARCHITECTURE_REGISTER_FILE);
const uint32_t file = reg.nr & 0xf0;
if (file == GEN_ARF_FLAG) {
const uint32_t subnr = reg.subnr / sizeof(uint16_t);
GBE_ASSERT(nr < MAX_FLAG_REGISTER && (subnr == 0 || subnr == 1));
- return virtualNum + 2*nr + subnr;
+ return grfNum + 2*nr + subnr;
} else if (file == GEN_ARF_ACCUMULATOR) {
GBE_ASSERT(nr < MAX_ACC_REGISTER);
- return virtualNum + MAX_FLAG_REGISTER + nr;
+ return grfNum + MAX_FLAG_REGISTER + nr;
} else {
NOT_SUPPORTED;
return 0;
}
- } else
+ }
+ // We directly manipulate physical GRFs here
+ else if (scheduler.policy == POST_ALLOC) {
+ const GenRegister physical = scheduler.ctx.ra->genReg(reg);
+ const uint32_t simdWidth = scheduler.ctx.getSimdWidth();
+ return simdWidth == 8 ? physical.nr : physical.nr / 2;
+ }
+ // We use virtual registers since allocation is not done yet
+ else
return reg.value.reg;
}
uint32_t DependencyTracker::getIndex(uint32_t bti) const {
- const uint32_t memDelta = virtualNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER;
+ const uint32_t memDelta = grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER;
return bti == 0xfe ? memDelta + LOCAL_MEMORY : memDelta + GLOBAL_MEMORY;
}
return 0;
}
- SelectionScheduler::SelectionScheduler(GenContext &ctx, Selection &selection) :
- listPool(nextHighestPowerOf2(selection.getLargestBlockSize())),
+ SelectionScheduler::SelectionScheduler(GenContext &ctx,
+ Selection &selection,
+ SchedulePolicy policy) :
+ policy(policy), listPool(nextHighestPowerOf2(selection.getLargestBlockSize())),
ctx(ctx), selection(selection), tracker(selection, *this)
{
this->clearLists();
}
// Try to schedule something from the ready list
- auto toSchedule = this->ready.begin();
+ intrusive_list<ScheduleListNode>::iterator toSchedule;
+ if (policy == POST_ALLOC) // FIFO scheduling
+ toSchedule = this->ready.begin();
+ else // LIFO scheduling
+ toSchedule = this->ready.rbegin();
+ // toSchedule = this->ready.begin();
+
if (toSchedule != this->ready.end()) {
cycle += getThroughputGen7(toSchedule->node->insn, isSIMD8);
this->ready.erase(toSchedule);
}
}
- BVAR(OCL_SCHEDULE_INSN, true);
+ BVAR(OCL_POST_ALLOC_INSN_SCHEDULE, true);
+ BVAR(OCL_PRE_ALLOC_INSN_SCHEDULE, true);
+
+ void schedulePostRegAllocation(GenContext &ctx, Selection &selection) {
+ if (OCL_POST_ALLOC_INSN_SCHEDULE) {
+ SelectionScheduler scheduler(ctx, selection, POST_ALLOC);
+ for (auto &bb : *selection.blockList) {
+ const int32_t insnNum = scheduler.buildDAG(bb);
+ bb.insnList.clear();
+ scheduler.scheduleDAG(bb, insnNum);
+ }
+ }
+ }
void schedulePreRegAllocation(GenContext &ctx, Selection &selection) {
- if (OCL_SCHEDULE_INSN) {
- SelectionScheduler scheduler(ctx, selection);
+ if (OCL_PRE_ALLOC_INSN_SCHEDULE) {
+ SelectionScheduler scheduler(ctx, selection, PRE_ALLOC);
for (auto &bb : *selection.blockList) {
const int32_t insnNum = scheduler.buildDAG(bb);
bb.insnList.clear();
class Selection; // Pre ISA code
class GenContext; // Handle compilation for Gen
- /*! Schedule the code per basic block */
+ /*! Schedule the code per basic block (tends to limit register number) */
void schedulePreRegAllocation(GenContext &ctx, Selection &selection);
+ /*! Schedule the code per basic block (tends to deal with insn latency) */
+ void schedulePostRegAllocation(GenContext &ctx, Selection &selection);
+
} /* namespace gbe */
#endif /* __GBE_GEN_INSN_SCHEDULING_HPP__ */
}
SelectionLibrary::SelectionLibrary(void) {
- /*! Force MAD pattern */
- BVAR(OCL_FORCE_MAD_PATTERN, false);
-
this->insert<UnaryInstructionPattern>();
this->insert<BinaryInstructionPattern>();
this->insert<TernaryInstructionPattern>();
this->insert<BranchInstructionPattern>();
this->insert<Int32x32MulInstructionPattern>();
this->insert<Int32x16MulInstructionPattern>();
- if (OCL_FORCE_MAD_PATTERN)
- this->insert<MulAddInstructionPattern>();
+ this->insert<MulAddInstructionPattern>();
// Sort all the patterns with the number of instructions they output
for (uint32_t op = 0; op < ir::OP_INVALID; ++op)
GenProgram::GenProgram(void) {}
GenProgram::~GenProgram(void) {}
+ /*! We must avoid spilling at all cost with Gen */
+ static const struct CodeGenStrategy {
+ uint32_t simdWidth;
+ bool limitRegisterPressure;
+ } codeGenStrategy[] = {
+ {16,false},
+ {16,true},
+ {8,false},
+ {8,true},
+ };
+
Kernel *GenProgram::compileKernel(const ir::Unit &unit, const std::string &name) {
- Context *ctx = GBE_NEW(GenContext, unit, name);
- Kernel *kernel = ctx->compileKernel();
-
- // register allocation may fail. We may need to recompile in that case
- if (kernel == NULL) {
- GBE_SAFE_DELETE(ctx);
- unit.getFunction(name)->setSimdWidth(8);
- ctx = GBE_NEW(GenContext, unit, name);
+
+ // Be careful when the simdWidth is forced by the programmer. We can see it
+ // when the function already provides the simd width we need to use (i.e.
+ // non zero)
+ const ir::Function *fn = unit.getFunction(name);
+ const uint32_t codeGenNum = fn->getSimdWidth() != 0 ? 2 : 4;
+ uint32_t codeGen = fn->getSimdWidth() == 8 ? 2 : 0;
+ Kernel *kernel = NULL;
+
+ // Stop when compilation is successful
+ for (; codeGen < codeGenNum; ++codeGen) {
+ const uint32_t simdWidth = codeGenStrategy[codeGen].simdWidth;
+ const bool limitRegisterPressure = codeGenStrategy[codeGen].limitRegisterPressure;
+
+ // Force the SIMD width now and try to compile
+ unit.getFunction(name)->setSimdWidth(simdWidth);
+ Context *ctx = GBE_NEW(GenContext, unit, name, limitRegisterPressure);
kernel = ctx->compileKernel();
- GBE_ASSERT(kernel != NULL); // XXX spill must be implemented
+ GBE_DELETE(ctx);
+ if (kernel != NULL)
+ break;
}
- GBE_DELETE(ctx);
+
+ // XXX spill must be implemented
+ GBE_ASSERTM(kernel != NULL, "Register spilling not supported yet!");
return kernel;
}
#include "ir/profile.hpp"
#include "ir/function.hpp"
#include "backend/gen_insn_selection.hpp"
-#include "backend/gen_insn_scheduling.hpp"
#include "backend/gen_reg_allocation.hpp"
#include "backend/gen_register.hpp"
#include "backend/program.hpp"
// Allocate all the vectors first since they need to be contiguous
this->allocateVector(selection);
- schedulePreRegAllocation(ctx, selection);
+ // schedulePreRegAllocation(ctx, selection);
// Now start the linear scan allocation
for (uint32_t regID = 0; regID < ctx.sel->getRegNum(); ++regID)
#define sin native_sin
#define pow powr
-PURE CONST OVERLOADABLE float mad(float a, float b, float c);
+//PURE CONST OVERLOADABLE float mad(float a, float b, float c);
+INLINE OVERLOADABLE float mad(float a, float b, float c) {
+ return a*b+c;
+}
INLINE OVERLOADABLE uint select(uint src0, uint src1, uint cond) {
return cond ? src1 : src0;
"#define sin native_sin\n"
"#define pow powr\n"
"\n"
-"PURE CONST OVERLOADABLE float mad(float a, float b, float c);\n"
+"//PURE CONST OVERLOADABLE float mad(float a, float b, float c);\n"
+"INLINE OVERLOADABLE float mad(float a, float b, float c) {\n"
+" return a*b+c;\n"
+"}\n"
"\n"
"INLINE OVERLOADABLE uint select(uint src0, uint src1, uint cond) {\n"
" return cond ? src1 : src0;\n"
DECL_SHADER_TOY_TEST(dim,dim,compiler_ribbon);
DECL_SHADER_TOY_TEST(dim,dim,compiler_chocolux);
DECL_SHADER_TOY_TEST(dim,dim,compiler_nautilus);
-DECL_SHADER_TOY_TEST(dim,dim,compiler_menger_sponge);
+// DECL_SHADER_TOY_TEST(dim,dim,compiler_menger_sponge);
DECL_SHADER_TOY_TEST(dim,dim,compiler_julia);
#undef DECL_SHADER_TOY_TEST