From: Zhigang Gong <zhigang.gong@intel.com>
Date: Wed, 12 Mar 2014 09:08:15 +0000 (+0800)
Subject: GBE: use a uniform style to calculate register size for curbe allocation.
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=85d2420f8c89e1c5157c57630990a14ebae31a5d;p=contrib%2Fbeignet.git

GBE: use a uniform style to calculate register size for curbe allocation.

Concentrate the register allocation to one place, and don't
use hard coded size when do curbe register allocation. All
register size allocation should use the same method.

Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
Reviewed-by: "Song, Ruiling" <ruiling.song@intel.com>
---

diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index b8f4171..dc27d83 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -353,7 +353,6 @@ namespace gbe
   Kernel *Context::compileKernel(void) {
     this->kernel = this->allocateKernel();
     this->kernel->simdWidth = this->simdWidth;
-    this->buildPatchList();
     this->buildArgList();
     this->buildUsedLabels();
     this->buildJIPs();
@@ -417,7 +416,7 @@ namespace gbe
     if (stackUse.size() == 0)  // no stack is used if stackptr is unused
       return;
     // Be sure that the stack pointer is set
-    GBE_ASSERT(this->kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) >= 0);
+    // GBE_ASSERT(this->kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) >= 0);
     uint32_t stackSize = 1*KB;
     while (stackSize < fn.getStackSize()) {
       stackSize <<= 1;
@@ -453,100 +452,10 @@ namespace gbe
     return offset + GEN_REG_SIZE;
   }
 
-
   void Context::insertCurbeReg(ir::Register reg, uint32_t offset) {
     curbeRegs.insert(std::make_pair(reg, offset));
   }
 
-  void Context::buildPatchList(void) {
-    const uint32_t ptrSize = unit.getPointerSize() == ir::POINTER_32_BITS ? 4u : 8u;
-    kernel->curbeSize = 0u;
-
-    // We insert the block IP mask first
-    this->insertCurbeReg(ir::ocl::blockip, this->newCurbeEntry(GBE_CURBE_BLOCK_IP, 0, this->simdWidth*sizeof(uint16_t)));
-    this->insertCurbeReg(ir::ocl::emask, this->newCurbeEntry(GBE_CURBE_EMASK, 0,  sizeof(uint16_t)));
-    this->insertCurbeReg(ir::ocl::notemask, this->newCurbeEntry(GBE_CURBE_NOT_EMASK, 0, sizeof(uint16_t)));
-    this->insertCurbeReg(ir::ocl::barriermask, this->newCurbeEntry(GBE_CURBE_BARRIER_MASK, 0, sizeof(uint16_t)));
-
-    // Go over the arguments and find the related patch locations
-    const uint32_t argNum = fn.argNum();
-    for (uint32_t argID = 0u; argID < argNum; ++argID) {
-      const ir::FunctionArgument &arg = fn.getArg(argID);
-      // For pointers and values, we have nothing to do. We just push the values
-      if (arg.type == ir::FunctionArgument::GLOBAL_POINTER ||
-          arg.type == ir::FunctionArgument::LOCAL_POINTER ||
-          arg.type == ir::FunctionArgument::CONSTANT_POINTER ||
-          arg.type == ir::FunctionArgument::VALUE ||
-          arg.type == ir::FunctionArgument::STRUCTURE ||
-          arg.type == ir::FunctionArgument::IMAGE ||
-          arg.type == ir::FunctionArgument::SAMPLER)
-        this->insertCurbeReg(arg.reg, this->newCurbeEntry(GBE_CURBE_KERNEL_ARGUMENT, argID, arg.size, ptrSize));
-    }
-
-    // Already inserted registers go here
-    const size_t localIDSize = sizeof(uint32_t) * this->simdWidth;
-    insertCurbeReg(ir::ocl::lid0, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_X, 0, localIDSize));
-    insertCurbeReg(ir::ocl::lid1, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Y, 0, localIDSize));
-    insertCurbeReg(ir::ocl::lid2, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Z, 0, localIDSize));
-
-    // Go over all the instructions and find the special register we need
-    // to push
-#define INSERT_REG(SPECIAL_REG, PATCH, WIDTH) \
-  if (reg == ir::ocl::SPECIAL_REG) { \
-    if (curbeRegs.find(reg) != curbeRegs.end()) continue; \
-    insertCurbeReg(reg, this->newCurbeEntry(GBE_CURBE_##PATCH, 0, ptrSize * WIDTH)); \
-  } else
-
-    bool useStackPtr = false;
-    fn.foreachInstruction([&](ir::Instruction &insn) {
-      const uint32_t srcNum = insn.getSrcNum();
-      for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
-        const ir::Register reg = insn.getSrc(srcID);
-        if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) {
-          if (srcID != 0) continue;
-          const unsigned char bti = ir::cast<ir::GetImageInfoInstruction>(insn).getImageIndex();
-          const unsigned char type =  ir::cast<ir::GetImageInfoInstruction>(insn).getInfoType();;
-          ir::ImageInfoKey key(bti, type);
-          const ir::Register imageInfo = insn.getSrc(0);
-          if (curbeRegs.find(imageInfo) == curbeRegs.end()) {
-            uint32_t offset = this->getImageInfoCurbeOffset(key, 4);
-            insertCurbeReg(imageInfo, offset);
-          }
-          continue;
-        }
-        if (fn.isSpecialReg(reg) == false) continue;
-        if (curbeRegs.find(reg) != curbeRegs.end()) continue;
-        if (reg == ir::ocl::stackptr) useStackPtr = true;
-        INSERT_REG(lsize0, LOCAL_SIZE_X, 1)
-        INSERT_REG(lsize1, LOCAL_SIZE_Y, 1)
-        INSERT_REG(lsize2, LOCAL_SIZE_Z, 1)
-        INSERT_REG(gsize0, GLOBAL_SIZE_X, 1)
-        INSERT_REG(gsize1, GLOBAL_SIZE_Y, 1)
-        INSERT_REG(gsize2, GLOBAL_SIZE_Z, 1)
-        INSERT_REG(goffset0, GLOBAL_OFFSET_X, 1)
-        INSERT_REG(goffset1, GLOBAL_OFFSET_Y, 1)
-        INSERT_REG(goffset2, GLOBAL_OFFSET_Z, 1)
-        INSERT_REG(workdim, WORK_DIM, 1)
-        INSERT_REG(numgroup0, GROUP_NUM_X, 1)
-        INSERT_REG(numgroup1, GROUP_NUM_Y, 1)
-        INSERT_REG(numgroup2, GROUP_NUM_Z, 1)
-        INSERT_REG(stackptr, STACK_POINTER, this->simdWidth)
-        do {} while(0);
-      }
-    });
-#undef INSERT_REG
-
-    // Insert the stack buffer if used
-    if (useStackPtr)
-      insertCurbeReg(ir::ocl::stackbuffer, this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize));
-
-    // After this point the vector is immutable. Sorting it will make
-    // research faster
-    std::sort(kernel->patches.begin(), kernel->patches.end());
-
-    kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE);
-  }
-
   void Context::buildArgList(void) {
     kernel->argNum = fn.argNum();
     if (kernel->argNum)
diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp
index 384a2fb..26167a0 100644
--- a/backend/src/backend/context.hpp
+++ b/backend/src/backend/context.hpp
@@ -105,8 +105,6 @@ namespace gbe
     virtual Kernel *allocateKernel(void) = 0;
     /*! Look if a stack is needed and allocate it */
     void buildStack(void);
-    /*! Build the curbe patch list for the given kernel */
-    void buildPatchList(void);
     /*! Build the list of arguments to set to launch the kernel */
     void buildArgList(void);
     /*! Build the sets of used labels */
@@ -121,6 +119,7 @@ namespace gbe
      *  of the entry
      */
     void insertCurbeReg(ir::Register, uint32_t grfOffset);
+    /*! allocate a curbe entry. */
     uint32_t newCurbeEntry(gbe_curbe_type value, uint32_t subValue, uint32_t size, uint32_t alignment = 0);
     /*! Provide for each branch and label the label index target */
     typedef map<const ir::Instruction*, ir::LabelIndex> JIPMap;
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 8bcf454..51c6c97 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -31,6 +31,7 @@
 #include "backend/gen_reg_allocation.hpp"
 #include "backend/gen/gen_mesa_disasm.h"
 #include "ir/function.hpp"
+#include "ir/value.hpp"
 #include "sys/cvar.hpp"
 #include <cstring>
 #include <iostream>
@@ -1860,8 +1861,116 @@ namespace gbe
 
   BVAR(OCL_OUTPUT_REG_ALLOC, false);
   BVAR(OCL_OUTPUT_ASM, false);
+
+  void GenContext::allocCurbeReg(ir::Register reg, gbe_curbe_type value, uint32_t subValue) {
+    uint32_t regSize;
+    regSize = this->ra->getRegSize(reg);
+    insertCurbeReg(reg, newCurbeEntry(value, subValue, regSize));
+  }
+
+  void GenContext::buildPatchList(void) {
+    const uint32_t ptrSize = unit.getPointerSize() == ir::POINTER_32_BITS ? 4u : 8u;
+    kernel->curbeSize = 0u;
+    auto &stackUse = dag->getUse(ir::ocl::stackptr);
+
+    // We insert the block IP mask first
+#if 0
+    this->insertCurbeReg(ir::ocl::blockip, this->newCurbeEntry(GBE_CURBE_BLOCK_IP, 0, this->simdWidth * sizeof(uint16_t)));
+    this->insertCurbeReg(ir::ocl::emask, this->newCurbeEntry(GBE_CURBE_EMASK, 0,  this->simdWidth * sizeof(uint16_t)));
+    this->insertCurbeReg(ir::ocl::notemask, this->newCurbeEntry(GBE_CURBE_NOT_EMASK, 0, sizeof(uint16_t)));
+    this->insertCurbeReg(ir::ocl::barriermask, this->newCurbeEntry(GBE_CURBE_BARRIER_MASK, 0, sizeof(uint16_t)));
+    // Already inserted registers go here
+    const size_t localIDSizde = sizeof(uint32_t) * this->simdWidth;
+    insertCurbeReg(ir::ocl::lid0, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_X, 0, localIDSize));
+    insertCurbeReg(ir::ocl::lid1, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Y, 0, localIDSize));
+    insertCurbeReg(ir::ocl::lid2, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Z, 0, localIDSize));
+    // Insert the stack buffer if used
+    if (stackUse.size() != 0)
+      insertCurbeReg(ir::ocl::stackbuffer, this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize));
+#else
+    using namespace ir::ocl;
+    allocCurbeReg(blockip, GBE_CURBE_BLOCK_IP);
+    allocCurbeReg(emask, GBE_CURBE_EMASK);
+    allocCurbeReg(notemask, GBE_CURBE_NOT_EMASK);
+    allocCurbeReg(barriermask, GBE_CURBE_BARRIER_MASK);
+    allocCurbeReg(lid0, GBE_CURBE_LOCAL_ID_X);
+    allocCurbeReg(lid1, GBE_CURBE_LOCAL_ID_Y);
+    allocCurbeReg(lid2, GBE_CURBE_LOCAL_ID_Z);
+    if (stackUse.size() != 0)
+      allocCurbeReg(stackbuffer, GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER);
+#endif
+    // Go over the arguments and find the related patch locations
+    const uint32_t argNum = fn.argNum();
+    for (uint32_t argID = 0u; argID < argNum; ++argID) {
+      const ir::FunctionArgument &arg = fn.getArg(argID);
+      // For pointers and values, we have nothing to do. We just push the values
+      if (arg.type == ir::FunctionArgument::GLOBAL_POINTER ||
+          arg.type == ir::FunctionArgument::LOCAL_POINTER ||
+          arg.type == ir::FunctionArgument::CONSTANT_POINTER ||
+          arg.type == ir::FunctionArgument::VALUE ||
+          arg.type == ir::FunctionArgument::STRUCTURE ||
+          arg.type == ir::FunctionArgument::IMAGE ||
+          arg.type == ir::FunctionArgument::SAMPLER)
+        this->insertCurbeReg(arg.reg, this->newCurbeEntry(GBE_CURBE_KERNEL_ARGUMENT, argID, arg.size, ptrSize));
+    }
+
+    // Go over all the instructions and find the special register we need
+    // to push
+    #define INSERT_REG(SPECIAL_REG, PATCH) \
+    if (reg == ir::ocl::SPECIAL_REG) { \
+      if (curbeRegs.find(reg) != curbeRegs.end()) continue; \
+      allocCurbeReg(reg, GBE_CURBE_##PATCH); \
+    } else
+  
+    fn.foreachInstruction([&](ir::Instruction &insn) {
+      const uint32_t srcNum = insn.getSrcNum();
+      for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+        const ir::Register reg = insn.getSrc(srcID);
+        if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) {
+          if (srcID != 0) continue;
+          const unsigned char bti = ir::cast<ir::GetImageInfoInstruction>(insn).getImageIndex();
+          const unsigned char type =  ir::cast<ir::GetImageInfoInstruction>(insn).getInfoType();;
+          ir::ImageInfoKey key(bti, type);
+          const ir::Register imageInfo = insn.getSrc(0);
+          if (curbeRegs.find(imageInfo) == curbeRegs.end()) {
+            uint32_t offset = this->getImageInfoCurbeOffset(key, 4);
+            insertCurbeReg(imageInfo, offset);
+          }
+          continue;
+        }
+        if (fn.isSpecialReg(reg) == false) continue;
+        if (curbeRegs.find(reg) != curbeRegs.end()) continue;
+        if (reg == ir::ocl::stackptr) GBE_ASSERT(stackUse.size() > 0);
+        INSERT_REG(lsize0, LOCAL_SIZE_X)
+        INSERT_REG(lsize1, LOCAL_SIZE_Y)
+        INSERT_REG(lsize2, LOCAL_SIZE_Z)
+        INSERT_REG(gsize0, GLOBAL_SIZE_X)
+        INSERT_REG(gsize1, GLOBAL_SIZE_Y)
+        INSERT_REG(gsize2, GLOBAL_SIZE_Z)
+        INSERT_REG(goffset0, GLOBAL_OFFSET_X)
+        INSERT_REG(goffset1, GLOBAL_OFFSET_Y)
+        INSERT_REG(goffset2, GLOBAL_OFFSET_Z)
+        INSERT_REG(workdim, WORK_DIM)
+        INSERT_REG(numgroup0, GROUP_NUM_X)
+        INSERT_REG(numgroup1, GROUP_NUM_Y)
+        INSERT_REG(numgroup2, GROUP_NUM_Z)
+        INSERT_REG(stackptr, STACK_POINTER)
+        do {} while(0);
+      }
+    });
+#undef INSERT_REG
+
+
+    // After this point the vector is immutable. Sorting it will make
+    // research faster
+    std::sort(kernel->patches.begin(), kernel->patches.end());
+
+    kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE);
+  }
+
   bool GenContext::emitCode(void) {
     GenKernel *genKernel = static_cast<GenKernel*>(this->kernel);
+    buildPatchList();
     sel->select();
     schedulePreRegAllocation(*this, *this->sel);
     if (UNLIKELY(ra->allocate(*this->sel) == false))
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 642301c..6ec43cc 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -162,6 +162,12 @@ namespace gbe
      * regenerating the code
      */
     bool limitRegisterPressure;
+  private:
+    /*! Build the curbe patch list for the given kernel */
+    void buildPatchList(void);
+    /*! allocate a new curbe register and insert to curbe pool. */
+    void allocCurbeReg(ir::Register reg, gbe_curbe_type value, uint32_t subValue = 0);
+
   };
 
 } /* namespace gbe */
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index f446a5b..2ba9495 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -1001,5 +1001,11 @@ namespace gbe
     this->opaque->outputAllocation();
   }
 
+  uint32_t GenRegAllocator::getRegSize(ir::Register reg) {
+     uint32_t regSize; 
+     this->opaque->getRegAttrib(reg, regSize); 
+     return regSize;
+  }
+
 } /* namespace gbe */
 
diff --git a/backend/src/backend/gen_reg_allocation.hpp b/backend/src/backend/gen_reg_allocation.hpp
index bccccc8..a2a1d40 100644
--- a/backend/src/backend/gen_reg_allocation.hpp
+++ b/backend/src/backend/gen_reg_allocation.hpp
@@ -57,6 +57,8 @@ namespace gbe
     GenRegister genReg(const GenRegister &reg);
     /*! Output the register allocation */
     void outputAllocation(void);
+    /*! Get register actual size in byte. */
+    uint32_t getRegSize(ir::Register reg);
   private:
     /*! Actual implementation of the register allocator (use Pimpl) */
     class Opaque;
diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp
index e6fc411..83fb0b4 100644
--- a/backend/src/backend/program.hpp
+++ b/backend/src/backend/program.hpp
@@ -180,6 +180,7 @@ namespace gbe {
 
   protected:
     friend class Context;      //!< Owns the kernels
+    friend class GenContext;
     std::string name;    //!< Kernel name
     KernelArgument *args;      //!< Each argument
     vector<PatchInfo> patches; //!< Indicates how to build the curbe