GBE: refact the curbe register payload allocation.

author Zhigang Gong <zhigang.gong@linux.intel.com>

Wed, 25 Sep 2013 03:20:45 +0000 (11:20 +0800)

committer Zhigang Gong <zhigang.gong@linux.intel.com>

Thu, 26 Sep 2013 08:55:52 +0000 (16:55 +0800)
author Zhigang Gong <zhigang.gong@linux.intel.com>
Wed, 25 Sep 2013 03:20:45 +0000 (11:20 +0800)
committer Zhigang Gong <zhigang.gong@linux.intel.com>
Thu, 26 Sep 2013 08:55:52 +0000 (16:55 +0800)
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp

index a55ef04..2cacd07 100644 (file)
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -366,7 +366,7 @@ namespace gbe
      this->kernel->stackSize = 1*KB; // XXX compute that in a better way
    }
  
-  void Context::newCurbeEntry(gbe_curbe_type value,
+  uint32_t Context::newCurbeEntry(gbe_curbe_type value,
                                uint32_t subValue,
                                uint32_t size,
                                uint32_t alignment)
@@ -376,6 +376,7 @@ namespace gbe
      GBE_ASSERT(offset >= GEN_REG_SIZE);
      kernel->patches.push_back(PatchInfo(value, subValue, offset - GEN_REG_SIZE));
      kernel->curbeSize = std::max(kernel->curbeSize, offset + size - GEN_REG_SIZE);
+    return offset;
    }
  
    uint32_t Context::getImageInfoCurbeOffset(ir::ImageInfoKey key, size_t size)
@@ -392,12 +393,17 @@ namespace gbe
      return offset;
    }
  
+
+  void Context::insertCurbeReg(ir::Register reg, uint32_t offset) {
+    curbeRegs.insert(std::make_pair(reg, offset));
+  }
+
    void Context::buildPatchList(void) {
      const uint32_t ptrSize = unit.getPointerSize() == ir::POINTER_32_BITS ? 4u : 8u;
      kernel->curbeSize = 0u;
  
      // We insert the block IP mask first
-    this->newCurbeEntry(GBE_CURBE_BLOCK_IP, 0, this->simdWidth*sizeof(uint16_t));
+    this->insertCurbeReg(ir::ocl::blockip, this->newCurbeEntry(GBE_CURBE_BLOCK_IP, 0, this->simdWidth*sizeof(uint16_t)));
  
      // Go over the arguments and find the related patch locations
      const uint32_t argNum = fn.argNum();
@@ -411,28 +417,22 @@ namespace gbe
            arg.type == ir::FunctionArgument::STRUCTURE ||
            arg.type == ir::FunctionArgument::IMAGE ||
            arg.type == ir::FunctionArgument::SAMPLER)
-        this->newCurbeEntry(GBE_CURBE_KERNEL_ARGUMENT, argID, arg.size, ptrSize);
+        this->insertCurbeReg(arg.reg, this->newCurbeEntry(GBE_CURBE_KERNEL_ARGUMENT, argID, arg.size, ptrSize));
      }
  
      // Already inserted registers go here
-    set<ir::Register> specialRegs;
-
      const size_t localIDSize = sizeof(uint32_t) * this->simdWidth;
-    this->newCurbeEntry(GBE_CURBE_LOCAL_ID_X, 0, localIDSize);
-    this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Y, 0, localIDSize);
-    this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Z, 0, localIDSize);
-    this->newCurbeEntry(GBE_CURBE_SAMPLER_INFO, 0, 32);
-    specialRegs.insert(ir::ocl::lid0);
-    specialRegs.insert(ir::ocl::lid1);
-    specialRegs.insert(ir::ocl::lid2);
-    specialRegs.insert(ir::ocl::samplerinfo);
+    insertCurbeReg(ir::ocl::lid0, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_X, 0, localIDSize));
+    insertCurbeReg(ir::ocl::lid1, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Y, 0, localIDSize));
+    insertCurbeReg(ir::ocl::lid2, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Z, 0, localIDSize));
+    insertCurbeReg(ir::ocl::samplerinfo, this->newCurbeEntry(GBE_CURBE_SAMPLER_INFO, 0, 32));
  
      // Go over all the instructions and find the special register we need
      // to push
  #define INSERT_REG(SPECIAL_REG, PATCH, WIDTH) \
    if (reg == ir::ocl::SPECIAL_REG) { \
-    if (specialRegs.find(reg) != specialRegs.end()) continue; \
-    this->newCurbeEntry(GBE_CURBE_##PATCH, 0, ptrSize * WIDTH); \
+    if (curbeRegs.find(reg) != curbeRegs.end()) continue; \
+    insertCurbeReg(reg, this->newCurbeEntry(GBE_CURBE_##PATCH, 0, ptrSize * WIDTH)); \
    } else
  
      bool useStackPtr = false;
@@ -441,7 +441,7 @@ namespace gbe
        for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
          const ir::Register reg = insn.getSrc(srcID);
          if (fn.isSpecialReg(reg) == false) continue;
-        if (specialRegs.contains(reg) == true) continue;
+        if (curbeRegs.find(reg) != curbeRegs.end()) continue;
          if (reg == ir::ocl::stackptr) useStackPtr = true;
          INSERT_REG(lsize0, LOCAL_SIZE_X, 1)
          INSERT_REG(lsize1, LOCAL_SIZE_Y, 1)
@@ -458,17 +458,16 @@ namespace gbe
          INSERT_REG(numgroup2, GROUP_NUM_Z, 1)
          INSERT_REG(stackptr, STACK_POINTER, this->simdWidth)
          do {} while (0);
-        specialRegs.insert(reg);
        }
      });
  #undef INSERT_REG
  
      // Insert the number of threads
-    this->newCurbeEntry(GBE_CURBE_THREAD_NUM, 0, sizeof(uint32_t));
+    insertCurbeReg(ir::ocl::threadn, this->newCurbeEntry(GBE_CURBE_THREAD_NUM, 0, sizeof(uint32_t)));
  
      // Insert the stack buffer if used
      if (useStackPtr)
-      this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize);
+      insertCurbeReg(ir::ocl::stackptr, this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize));
  
      // After this point the vector is immutable. Sorting it will make
      // research faster
diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp

index 50c0e70..ca2c88d 100644 (file)
--- a/backend/src/backend/context.hpp
+++ b/backend/src/backend/context.hpp
@@ -93,6 +93,8 @@ namespace gbe
      uint32_t getImageInfoCurbeOffset(ir::ImageInfoKey key, size_t size);
      /*! allocate size scratch memory and return start address */
      uint32_t allocateScratchMem(uint32_t size);
+    /*! Preallocated curbe register set including special registers. */
+    map<ir::Register, uint32_t> curbeRegs;
    protected:
      /*! Build the instruction stream. Return false if failed */
      virtual bool emitCode(void) = 0;
@@ -115,7 +117,8 @@ namespace gbe
      /*! Insert a new entry with the given size in the Curbe. Return the offset
       *  of the entry
       */
-    void newCurbeEntry(gbe_curbe_type value, uint32_t subValue, uint32_t size, uint32_t alignment = 0);
+    void insertCurbeReg(ir::Register, uint32_t grfOffset);
+    uint32_t newCurbeEntry(gbe_curbe_type value, uint32_t subValue, uint32_t size, uint32_t alignment = 0);
      /*! Provide for each branch and label the label index target */
      typedef map<const ir::Instruction*, ir::LabelIndex> JIPMap;
      const ir::Unit &unit;                 //!< Unit that contains the kernel
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp

index a9132df..a72333d 100644 (file)
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -65,8 +65,10 @@ namespace gbe
      void allocateFlags(Selection &selection);
      /*! Allocate the GRF registers */
      bool allocateGRFs(Selection &selection);
+    /*! Create gen registers for all preallocated curbe registers. */
+    void allocatePayloadRegs(void);
      /*! Create a Gen register from a register set in the payload */
-    void allocatePayloadReg(gbe_curbe_type, ir::Register, uint32_t subValue = 0, uint32_t subOffset = 0);
+    void allocatePayloadReg(ir::Register, uint32_t offset, uint32_t subOffset = 0);
      /*! Create the intervals for each register */
      /*! Allocate the vectors detected in the instruction selection pass */
      void allocateVector(Selection &selection);
@@ -124,19 +126,37 @@ namespace gbe
    GenRegAllocator::Opaque::Opaque(GenContext &ctx) : ctx(ctx) {}
    GenRegAllocator::Opaque::~Opaque(void) {}
  
-  void GenRegAllocator::Opaque::allocatePayloadReg(gbe_curbe_type value,
-                                                   ir::Register reg,
-                                                   uint32_t subValue,
+  void GenRegAllocator::Opaque::allocatePayloadReg(ir::Register reg,
+                                                   uint32_t offset,
                                                     uint32_t subOffset)
    {
      using namespace ir;
-    const Kernel *kernel = ctx.getKernel();
-    const int32_t curbeOffset = kernel->getCurbeOffset(value, subValue);
-    if (curbeOffset >= 0) {
-      const uint32_t offset = GEN_REG_SIZE + curbeOffset + subOffset;
-      RA.insert(std::make_pair(reg, offset));
-      this->intervals[reg].minID = 0;
-      this->intervals[reg].maxID = 0;
+    assert(offset >= GEN_REG_SIZE);
+    offset += subOffset;
+    RA.insert(std::make_pair(reg, offset));
+    GBE_ASSERT(reg != ocl::blockip || (offset % GEN_REG_SIZE == 0));
+    this->intervals[reg].minID = 0;
+    this->intervals[reg].maxID = 0;
+  }
+
+  INLINE void GenRegAllocator::Opaque::allocatePayloadRegs(void) {
+    using namespace ir;
+    for(auto &it : this->ctx.curbeRegs)
+      allocatePayloadReg(it.first, it.second);
+
+    // Allocate all pushed registers (i.e. structure kernel arguments)
+    const Function &fn = ctx.getFunction();
+    GBE_ASSERT(fn.getProfile() == PROFILE_OCL);
+    const Function::PushMap &pushMap = fn.getPushMap();
+    for (const auto &pushed : pushMap) {
+      const uint32_t argID = pushed.second.argID;
+      const FunctionArgument arg = fn.getArg(argID);
+
+      const uint32_t subOffset = pushed.second.offset;
+      const Register reg = pushed.second.getRegister();
+      auto it = this->ctx.curbeRegs.find(arg.reg);
+      assert(it != ctx.curbeRegs.end());
+      allocatePayloadReg(reg, it->second, subOffset);
      }
    }
  
@@ -535,11 +555,9 @@ namespace gbe
      }
      return true;
    }
+
    INLINE bool GenRegAllocator::Opaque::allocate(Selection &selection) {
      using namespace ir;
-    const Kernel *kernel = ctx.getKernel();
-    const Function &fn = ctx.getFunction();
-    GBE_ASSERT(fn.getProfile() == PROFILE_OCL);
      if (ctx.getSimdWidth() == 8) {
        reservedReg = ctx.allocate(RESERVED_REG_NUM_FOR_SPILL * GEN_REG_SIZE, GEN_REG_SIZE);
        reservedReg /= GEN_REG_SIZE;
@@ -555,25 +573,7 @@ namespace gbe
        this->intervals.push_back(ir::Register(regID));
  
      // Allocate the special registers (only those which are actually used)
-    allocatePayloadReg(GBE_CURBE_LOCAL_ID_X, ocl::lid0);
-    allocatePayloadReg(GBE_CURBE_LOCAL_ID_Y, ocl::lid1);
-    allocatePayloadReg(GBE_CURBE_LOCAL_ID_Z, ocl::lid2);
-    allocatePayloadReg(GBE_CURBE_LOCAL_SIZE_X, ocl::lsize0);
-    allocatePayloadReg(GBE_CURBE_LOCAL_SIZE_Y, ocl::lsize1);
-    allocatePayloadReg(GBE_CURBE_LOCAL_SIZE_Z, ocl::lsize2);
-    allocatePayloadReg(GBE_CURBE_GLOBAL_SIZE_X, ocl::gsize0);
-    allocatePayloadReg(GBE_CURBE_GLOBAL_SIZE_Y, ocl::gsize1);
-    allocatePayloadReg(GBE_CURBE_GLOBAL_SIZE_Z, ocl::gsize2);
-    allocatePayloadReg(GBE_CURBE_GLOBAL_OFFSET_X, ocl::goffset0);
-    allocatePayloadReg(GBE_CURBE_GLOBAL_OFFSET_Y, ocl::goffset1);
-    allocatePayloadReg(GBE_CURBE_GLOBAL_OFFSET_Z, ocl::goffset2);
-    allocatePayloadReg(GBE_CURBE_WORK_DIM, ocl::workdim);
-    allocatePayloadReg(GBE_CURBE_SAMPLER_INFO, ocl::samplerinfo);
-    allocatePayloadReg(GBE_CURBE_GROUP_NUM_X, ocl::numgroup0);
-    allocatePayloadReg(GBE_CURBE_GROUP_NUM_Y, ocl::numgroup1);
-    allocatePayloadReg(GBE_CURBE_GROUP_NUM_Z, ocl::numgroup2);
-    allocatePayloadReg(GBE_CURBE_STACK_POINTER, ocl::stackptr);
-    allocatePayloadReg(GBE_CURBE_THREAD_NUM, ocl::threadn);
+    this->allocatePayloadRegs();
  
      // Group and barrier IDs are always allocated by the hardware in r0
      RA.insert(std::make_pair(ocl::groupid0,  1*sizeof(float))); // r0.1
@@ -582,33 +582,6 @@ namespace gbe
      RA.insert(std::make_pair(ocl::barrierid, 2*sizeof(float))); // r0.2
  
      // block IP used to handle the mask in SW is always allocated
-    const int32_t blockIPOffset = GEN_REG_SIZE + kernel->getCurbeOffset(GBE_CURBE_BLOCK_IP,0);
-    GBE_ASSERT(blockIPOffset >= 0 && blockIPOffset % GEN_REG_SIZE == 0);
-    RA.insert(std::make_pair(ocl::blockip, blockIPOffset));
-    this->intervals[ocl::blockip].minID = 0;
-
-    // Allocate all (non-structure) argument parameters
-    const uint32_t argNum = fn.argNum();
-    for (uint32_t argID = 0; argID < argNum; ++argID) {
-      const FunctionArgument &arg = fn.getArg(argID);
-      GBE_ASSERT(arg.type == FunctionArgument::GLOBAL_POINTER ||
-                 arg.type == FunctionArgument::CONSTANT_POINTER ||
-                 arg.type == FunctionArgument::LOCAL_POINTER ||
-                 arg.type == FunctionArgument::VALUE ||
-                 arg.type == FunctionArgument::STRUCTURE ||
-                 arg.type == FunctionArgument::IMAGE ||
-                 arg.type == FunctionArgument::SAMPLER);
-      allocatePayloadReg(GBE_CURBE_KERNEL_ARGUMENT, arg.reg, argID);
-    }
-
-    // Allocate all pushed registers (i.e. structure kernel arguments)
-    const Function::PushMap &pushMap = fn.getPushMap();
-    for (const auto &pushed : pushMap) {
-      const uint32_t argID = pushed.second.argID;
-      const uint32_t subOffset = pushed.second.offset;
-      const Register reg = pushed.second.getRegister();
-      allocatePayloadReg(GBE_CURBE_KERNEL_ARGUMENT, reg, argID, subOffset);
-    }
  
      // Compute the intervals
      int32_t insnID = 0;
author	Zhigang Gong <zhigang.gong@linux.intel.com>
	Wed, 25 Sep 2013 03:20:45 +0000 (11:20 +0800)
committer	Zhigang Gong <zhigang.gong@linux.intel.com>
	Thu, 26 Sep 2013 08:55:52 +0000 (16:55 +0800)
backend/src/backend/context.cpp		patch \| blob \| history
backend/src/backend/context.hpp		patch \| blob \| history
backend/src/backend/gen_reg_allocation.cpp		patch \| blob \| history