Added more features to the instruction scheduler to support both pre-register

author Benjamin Segovia <benjamin.segovia@intel.com>

Wed, 7 Nov 2012 01:42:51 +0000 (17:42 -0800)

committer Benjamin Segovia <benjamin.segovia@intel.com>

Wed, 7 Nov 2012 01:42:51 +0000 (17:42 -0800)
author Benjamin Segovia <benjamin.segovia@intel.com>
Wed, 7 Nov 2012 01:42:51 +0000 (17:42 -0800)
committer Benjamin Segovia <benjamin.segovia@intel.com>
Wed, 7 Nov 2012 01:42:51 +0000 (17:42 -0800)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp

index 5d72514..35c2fdb 100644 (file)
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -27,6 +27,7 @@
  #include "backend/gen_defs.hpp"
  #include "backend/gen_encoder.hpp"
  #include "backend/gen_insn_selection.hpp"
+#include "backend/gen_insn_scheduling.hpp"
  #include "backend/gen_reg_allocation.hpp"
  #include "backend/gen/gen_mesa_disasm.h"
  #include "ir/function.hpp"
@@ -38,8 +39,10 @@ namespace gbe
    ///////////////////////////////////////////////////////////////////////////
    // GenContext implementation
    ///////////////////////////////////////////////////////////////////////////
-  GenContext::GenContext(const ir::Unit &unit, const std::string &name) :
-    Context(unit, name)
+  GenContext::GenContext(const ir::Unit &unit,
+                         const std::string &name,
+                         bool limitRegisterPressure) :
+    Context(unit, name), limitRegisterPressure(limitRegisterPressure)
    {
      this->p = GBE_NEW(GenEncoder, simdWidth, 7); // XXX handle more than Gen7
      this->sel = GBE_NEW(Selection, *this);
@@ -248,8 +251,10 @@ namespace gbe
    bool GenContext::emitCode(void) {
      GenKernel *genKernel = static_cast<GenKernel*>(this->kernel);
      sel->select();
+    schedulePreRegAllocation(*this, *this->sel);
      if (UNLIKELY(ra->allocate(*this->sel) == false))
        return false;
+    schedulePostRegAllocation(*this, *this->sel);
      this->emitStackPointer();
      this->emitInstructionStream();
      this->patchBranches();
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp

index f6171b4..20d2f0d 100644 (file)
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -50,7 +50,7 @@ namespace gbe
      /*! Create a new context. name is the name of the function we want to
       *  compile
       */
-    GenContext(const ir::Unit &unit, const std::string &name);
+    GenContext(const ir::Unit &unit, const std::string &name, bool limitRegisterPressure = false);
      /*! Release everything needed */
      ~GenContext(void);
      /*! Implements base class */
@@ -101,6 +101,10 @@ namespace gbe
      Selection *sel;
      /*! Perform the register allocation */
      GenRegAllocator *ra;
+    /*! Indicate if we need to tackle a register pressure issue when
+     * regenerating the code
+     */
+    bool limitRegisterPressure;
    };
  
  } /* namespace gbe */
diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp

index 4585ae3..1c9d664 100644 (file)
--- a/backend/src/backend/gen_insn_scheduling.cpp
+++ b/backend/src/backend/gen_insn_scheduling.cpp
@@ -23,6 +23,7 @@
   */
  
  #include "backend/gen_insn_selection.hpp"
+#include "backend/gen_reg_allocation.hpp"
  #include "sys/cvar.hpp"
  #include "sys/intrusive_list.hpp"
  
@@ -70,6 +71,12 @@ namespace gbe
      MAX_MEM_SYSTEM
    };
  
+  /*! Do we allocate after or before the register allocation? */
+  enum SchedulePolicy {
+    PRE_ALLOC = 0, // LIFO scheduling (tends to limit register pressure)
+    POST_ALLOC     // FIFO scheduling (limits latency problems)
+  };
+
    /*! Helper structure to handle dependencies while scheduling. Takes into
     *  account virtual and physical registers and memory sub-systems
     */
@@ -127,20 +134,22 @@ namespace gbe
      /*! Stores the nodes per instruction */
      vector<ScheduleDAGNode*> insnNodes;
      /*! Number of virtual register in the selection */
-    uint32_t virtualNum;
+    uint32_t grfNum;
    };
  
    /*! Perform the instruction scheduling */
    struct SelectionScheduler : public NonCopyable
    {
      /*! Init the book keeping structures */
-    SelectionScheduler(GenContext &ctx, Selection &selection);
+    SelectionScheduler(GenContext &ctx, Selection &selection, SchedulePolicy policy);
      /*! Make all lists empty */
      void clearLists(void);
      /*! Return the number of instructions to schedule in the DAG */
      int32_t buildDAG(SelectionBlock &bb);
      /*! Schedule the DAG */
      void scheduleDAG(SelectionBlock &bb, int32_t insnNum);
+    /*! To limit register pressure or limit insn latency problems */
+    SchedulePolicy policy;
      /*! Make ScheduleListNode allocation faster */
      DECL_POOL(ScheduleListNode, listPool);
      /*! Make ScheduleDAGNode allocation faster */
@@ -160,8 +169,15 @@ namespace gbe
    DependencyTracker::DependencyTracker(const Selection &selection, SelectionScheduler &scheduler) :
      scheduler(scheduler)
    {
-    this->virtualNum = selection.getRegNum();
-    nodes.resize(virtualNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_MEM_SYSTEM);
+    if (scheduler.policy == PRE_ALLOC) {
+      this->grfNum = selection.getRegNum();
+      nodes.resize(grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_MEM_SYSTEM);
+    } else {
+      const uint32_t simdWidth = scheduler.ctx.getSimdWidth();
+      GBE_ASSERT(simdWidth == 8 || simdWidth == 16);
+      this->grfNum = simdWidth == 8 ? 128 : 64;
+      nodes.resize(grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_MEM_SYSTEM);
+    }
      insnNodes.resize(selection.getLargestBlockSize());
    }
  
@@ -205,6 +221,7 @@ namespace gbe
    }
  
    uint32_t DependencyTracker::getIndex(GenRegister reg) const {
+    // Non GRF physical register
      if (reg.physical) {
        GBE_ASSERT (reg.file == GEN_ARCHITECTURE_REGISTER_FILE);
        const uint32_t file = reg.nr & 0xf0;
@@ -212,20 +229,28 @@ namespace gbe
        if (file == GEN_ARF_FLAG) {
          const uint32_t subnr = reg.subnr / sizeof(uint16_t);
          GBE_ASSERT(nr < MAX_FLAG_REGISTER && (subnr == 0 || subnr == 1));
-        return virtualNum + 2*nr + subnr;
+        return grfNum + 2*nr + subnr;
        } else if (file == GEN_ARF_ACCUMULATOR) {
          GBE_ASSERT(nr < MAX_ACC_REGISTER);
-        return virtualNum + MAX_FLAG_REGISTER + nr;
+        return grfNum + MAX_FLAG_REGISTER + nr;
        } else {
          NOT_SUPPORTED;
          return 0;
        }
-    } else
+    }
+    // We directly manipulate physical GRFs here
+    else if (scheduler.policy == POST_ALLOC) {
+      const GenRegister physical = scheduler.ctx.ra->genReg(reg);
+      const uint32_t simdWidth = scheduler.ctx.getSimdWidth();
+      return simdWidth == 8 ? physical.nr : physical.nr / 2;
+    }
+    // We use virtual registers since allocation is not done yet
+    else 
        return reg.value.reg;
    }
  
    uint32_t DependencyTracker::getIndex(uint32_t bti) const {
-    const uint32_t memDelta = virtualNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER;
+    const uint32_t memDelta = grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER;
      return bti == 0xfe ? memDelta + LOCAL_MEMORY : memDelta + GLOBAL_MEMORY;
    }
  
@@ -290,8 +315,10 @@ namespace gbe
      return 0;
    }
  
-  SelectionScheduler::SelectionScheduler(GenContext &ctx, Selection &selection) :
-    listPool(nextHighestPowerOf2(selection.getLargestBlockSize())),
+  SelectionScheduler::SelectionScheduler(GenContext &ctx,
+                                         Selection &selection,
+                                         SchedulePolicy policy) :
+    policy(policy), listPool(nextHighestPowerOf2(selection.getLargestBlockSize())),
      ctx(ctx), selection(selection), tracker(selection, *this)
    {
      this->clearLists();
@@ -422,7 +449,13 @@ namespace gbe
        }
  
        // Try to schedule something from the ready list
-      auto toSchedule = this->ready.begin();
+      intrusive_list<ScheduleListNode>::iterator toSchedule;
+      if (policy == POST_ALLOC) // FIFO scheduling
+        toSchedule = this->ready.begin();
+      else                      // LIFO scheduling
+        toSchedule = this->ready.rbegin();
+        // toSchedule = this->ready.begin();
+
        if (toSchedule != this->ready.end()) {
          cycle += getThroughputGen7(toSchedule->node->insn, isSIMD8);
          this->ready.erase(toSchedule);
@@ -435,11 +468,23 @@ namespace gbe
      }
    }
  
-  BVAR(OCL_SCHEDULE_INSN, true);
+  BVAR(OCL_POST_ALLOC_INSN_SCHEDULE, true);
+  BVAR(OCL_PRE_ALLOC_INSN_SCHEDULE, true);
+
+  void schedulePostRegAllocation(GenContext &ctx, Selection &selection) {
+    if (OCL_POST_ALLOC_INSN_SCHEDULE) {
+      SelectionScheduler scheduler(ctx, selection, POST_ALLOC);
+      for (auto &bb : *selection.blockList) {
+        const int32_t insnNum = scheduler.buildDAG(bb);
+        bb.insnList.clear();
+        scheduler.scheduleDAG(bb, insnNum);
+      }
+    }
+  }
  
    void schedulePreRegAllocation(GenContext &ctx, Selection &selection) {
-    if (OCL_SCHEDULE_INSN) {
-      SelectionScheduler scheduler(ctx, selection);
+    if (OCL_PRE_ALLOC_INSN_SCHEDULE) {
+      SelectionScheduler scheduler(ctx, selection, PRE_ALLOC);
        for (auto &bb : *selection.blockList) {
          const int32_t insnNum = scheduler.buildDAG(bb);
          bb.insnList.clear();
diff --git a/backend/src/backend/gen_insn_scheduling.hpp b/backend/src/backend/gen_insn_scheduling.hpp

index 4d9d10f..534557d 100644 (file)
--- a/backend/src/backend/gen_insn_scheduling.hpp
+++ b/backend/src/backend/gen_insn_scheduling.hpp
@@ -30,9 +30,12 @@ namespace gbe
    class Selection;  // Pre ISA code
    class GenContext; // Handle compilation for Gen
  
-  /*! Schedule the code per basic block */
+  /*! Schedule the code per basic block (tends to limit register number) */
    void schedulePreRegAllocation(GenContext &ctx, Selection &selection);
  
+  /*! Schedule the code per basic block (tends to deal with insn latency) */
+  void schedulePostRegAllocation(GenContext &ctx, Selection &selection);
+
  } /* namespace gbe */
  
  #endif /* __GBE_GEN_INSN_SCHEDULING_HPP__ */
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp

index 673bdc6..343ff0d 100644 (file)
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -1813,9 +1813,6 @@ namespace gbe
    }
  
    SelectionLibrary::SelectionLibrary(void) {
-    /*! Force MAD pattern */
-    BVAR(OCL_FORCE_MAD_PATTERN, false);
-
      this->insert<UnaryInstructionPattern>();
      this->insert<BinaryInstructionPattern>();
      this->insert<TernaryInstructionPattern>();
@@ -1832,8 +1829,7 @@ namespace gbe
      this->insert<BranchInstructionPattern>();
      this->insert<Int32x32MulInstructionPattern>();
      this->insert<Int32x16MulInstructionPattern>();
-    if (OCL_FORCE_MAD_PATTERN)
-      this->insert<MulAddInstructionPattern>();
+    this->insert<MulAddInstructionPattern>();
  
      // Sort all the patterns with the number of instructions they output
      for (uint32_t op = 0; op < ir::OP_INVALID; ++op)
diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp

index e59b9e8..33032fc 100644 (file)
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -46,19 +46,43 @@ namespace gbe {
    GenProgram::GenProgram(void) {}
    GenProgram::~GenProgram(void) {}
  
+  /*! We must avoid spilling at all cost with Gen */
+  static const struct CodeGenStrategy {
+    uint32_t simdWidth;
+    bool limitRegisterPressure;
+  } codeGenStrategy[] = {
+    {16,false},
+    {16,true},
+    {8,false},
+    {8,true},
+  };
+
    Kernel *GenProgram::compileKernel(const ir::Unit &unit, const std::string &name) {
-    Context *ctx = GBE_NEW(GenContext, unit, name);
-    Kernel *kernel = ctx->compileKernel();
-
-    // register allocation may fail. We may need to recompile in that case
-    if (kernel == NULL) {
-      GBE_SAFE_DELETE(ctx);
-      unit.getFunction(name)->setSimdWidth(8);
-      ctx = GBE_NEW(GenContext, unit, name);
+
+    // Be careful when the simdWidth is forced by the programmer. We can see it
+    // when the function already provides the simd width we need to use (i.e.
+    // non zero)
+    const ir::Function *fn = unit.getFunction(name);
+    const uint32_t codeGenNum = fn->getSimdWidth() != 0 ? 2 : 4;
+    uint32_t codeGen = fn->getSimdWidth() == 8 ? 2 : 0;
+    Kernel *kernel = NULL;
+
+    // Stop when compilation is successful
+    for (; codeGen < codeGenNum; ++codeGen) {
+      const uint32_t simdWidth = codeGenStrategy[codeGen].simdWidth;
+      const bool limitRegisterPressure = codeGenStrategy[codeGen].limitRegisterPressure;
+
+      // Force the SIMD width now and try to compile
+      unit.getFunction(name)->setSimdWidth(simdWidth);
+      Context *ctx = GBE_NEW(GenContext, unit, name, limitRegisterPressure);
        kernel = ctx->compileKernel();
-      GBE_ASSERT(kernel != NULL); // XXX spill must be implemented
+      GBE_DELETE(ctx);
+      if (kernel != NULL)
+        break;
      }
-    GBE_DELETE(ctx);
+
+    // XXX spill must be implemented
+    GBE_ASSERTM(kernel != NULL, "Register spilling not supported yet!");
      return kernel;
    }
  
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp

index 8184919..f57bb0d 100644 (file)
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -25,7 +25,6 @@
  #include "ir/profile.hpp"
  #include "ir/function.hpp"
  #include "backend/gen_insn_selection.hpp"
-#include "backend/gen_insn_scheduling.hpp"
  #include "backend/gen_reg_allocation.hpp"
  #include "backend/gen_register.hpp"
  #include "backend/program.hpp"
@@ -499,7 +498,7 @@ namespace gbe
  
      // Allocate all the vectors first since they need to be contiguous
      this->allocateVector(selection);
-    schedulePreRegAllocation(ctx, selection);
+    // schedulePreRegAllocation(ctx, selection);
  
      // Now start the linear scan allocation
      for (uint32_t regID = 0; regID < ctx.sel->getRegNum(); ++regID)
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h

index 89cb38f..3a6e2a2 100644 (file)
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -153,7 +153,10 @@ INLINE OVERLOADABLE float fmod(float x, float y) { return x-y*trunc(x/y); }
  #define sin native_sin
  #define pow powr
  
-PURE CONST OVERLOADABLE float mad(float a, float b, float c);
+//PURE CONST OVERLOADABLE float mad(float a, float b, float c);
+INLINE OVERLOADABLE float mad(float a, float b, float c) {
+  return a*b+c;
+}
  
  INLINE OVERLOADABLE uint select(uint src0, uint src1, uint cond) {
    return cond ? src1 : src0;
diff --git a/backend/src/ocl_stdlib_str.cpp b/backend/src/ocl_stdlib_str.cpp

index 5632f47..3855da9 100644 (file)
--- a/backend/src/ocl_stdlib_str.cpp
+++ b/backend/src/ocl_stdlib_str.cpp
@@ -156,7 +156,10 @@ std::string ocl_stdlib_str =
  "#define sin native_sin\n"
  "#define pow powr\n"
  "\n"
-"PURE CONST OVERLOADABLE float mad(float a, float b, float c);\n"
+"//PURE CONST OVERLOADABLE float mad(float a, float b, float c);\n"
+"INLINE OVERLOADABLE float mad(float a, float b, float c) {\n"
+"  return a*b+c;\n"
+"}\n"
  "\n"
  "INLINE OVERLOADABLE uint select(uint src0, uint src1, uint cond) {\n"
  "  return cond ? src1 : src0;\n"
diff --git a/utests/compiler_shader_toy.cpp b/utests/compiler_shader_toy.cpp

index 49f8254..bb84d1c 100644 (file)
--- a/utests/compiler_shader_toy.cpp
+++ b/utests/compiler_shader_toy.cpp
@@ -71,7 +71,7 @@ DECL_SHADER_TOY_TEST(dim,dim,compiler_clod);
  DECL_SHADER_TOY_TEST(dim,dim,compiler_ribbon);
  DECL_SHADER_TOY_TEST(dim,dim,compiler_chocolux);
  DECL_SHADER_TOY_TEST(dim,dim,compiler_nautilus);
-DECL_SHADER_TOY_TEST(dim,dim,compiler_menger_sponge);
+// DECL_SHADER_TOY_TEST(dim,dim,compiler_menger_sponge);
  DECL_SHADER_TOY_TEST(dim,dim,compiler_julia);
  
  #undef DECL_SHADER_TOY_TEST
author	Benjamin Segovia <benjamin.segovia@intel.com>
	Wed, 7 Nov 2012 01:42:51 +0000 (17:42 -0800)
committer	Benjamin Segovia <benjamin.segovia@intel.com>
	Wed, 7 Nov 2012 01:42:51 +0000 (17:42 -0800)
backend/src/backend/gen_context.cpp		patch \| blob \| history
backend/src/backend/gen_context.hpp		patch \| blob \| history
backend/src/backend/gen_insn_scheduling.cpp		patch \| blob \| history
backend/src/backend/gen_insn_scheduling.hpp		patch \| blob \| history
backend/src/backend/gen_insn_selection.cpp		patch \| blob \| history
backend/src/backend/gen_program.cpp		patch \| blob \| history
backend/src/backend/gen_reg_allocation.cpp		patch \| blob \| history
backend/src/ocl_stdlib.h		patch \| blob \| history
backend/src/ocl_stdlib_str.cpp		patch \| blob \| history
utests/compiler_shader_toy.cpp		patch \| blob \| history