GBE: use a uniform style to calculate register size for curbe allocation.

author Zhigang Gong <zhigang.gong@intel.com>

Wed, 12 Mar 2014 09:08:15 +0000 (17:08 +0800)

committer Zhigang Gong <zhigang.gong@intel.com>

Tue, 8 Apr 2014 08:21:32 +0000 (16:21 +0800)
author Zhigang Gong <zhigang.gong@intel.com>
Wed, 12 Mar 2014 09:08:15 +0000 (17:08 +0800)
committer Zhigang Gong <zhigang.gong@intel.com>
Tue, 8 Apr 2014 08:21:32 +0000 (16:21 +0800)
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp

index b8f4171..dc27d83 100644 (file)
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -353,7 +353,6 @@ namespace gbe
    Kernel *Context::compileKernel(void) {
      this->kernel = this->allocateKernel();
      this->kernel->simdWidth = this->simdWidth;
-    this->buildPatchList();
      this->buildArgList();
      this->buildUsedLabels();
      this->buildJIPs();
@@ -417,7 +416,7 @@ namespace gbe
      if (stackUse.size() == 0)  // no stack is used if stackptr is unused
        return;
      // Be sure that the stack pointer is set
-    GBE_ASSERT(this->kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) >= 0);
+    // GBE_ASSERT(this->kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) >= 0);
      uint32_t stackSize = 1*KB;
      while (stackSize < fn.getStackSize()) {
        stackSize <<= 1;
@@ -453,100 +452,10 @@ namespace gbe
      return offset + GEN_REG_SIZE;
    }
  
-
    void Context::insertCurbeReg(ir::Register reg, uint32_t offset) {
      curbeRegs.insert(std::make_pair(reg, offset));
    }
  
-  void Context::buildPatchList(void) {
-    const uint32_t ptrSize = unit.getPointerSize() == ir::POINTER_32_BITS ? 4u : 8u;
-    kernel->curbeSize = 0u;
-
-    // We insert the block IP mask first
-    this->insertCurbeReg(ir::ocl::blockip, this->newCurbeEntry(GBE_CURBE_BLOCK_IP, 0, this->simdWidth*sizeof(uint16_t)));
-    this->insertCurbeReg(ir::ocl::emask, this->newCurbeEntry(GBE_CURBE_EMASK, 0,  sizeof(uint16_t)));
-    this->insertCurbeReg(ir::ocl::notemask, this->newCurbeEntry(GBE_CURBE_NOT_EMASK, 0, sizeof(uint16_t)));
-    this->insertCurbeReg(ir::ocl::barriermask, this->newCurbeEntry(GBE_CURBE_BARRIER_MASK, 0, sizeof(uint16_t)));
-
-    // Go over the arguments and find the related patch locations
-    const uint32_t argNum = fn.argNum();
-    for (uint32_t argID = 0u; argID < argNum; ++argID) {
-      const ir::FunctionArgument &arg = fn.getArg(argID);
-      // For pointers and values, we have nothing to do. We just push the values
-      if (arg.type == ir::FunctionArgument::GLOBAL_POINTER ||
-          arg.type == ir::FunctionArgument::LOCAL_POINTER ||
-          arg.type == ir::FunctionArgument::CONSTANT_POINTER ||
-          arg.type == ir::FunctionArgument::VALUE ||
-          arg.type == ir::FunctionArgument::STRUCTURE ||
-          arg.type == ir::FunctionArgument::IMAGE ||
-          arg.type == ir::FunctionArgument::SAMPLER)
-        this->insertCurbeReg(arg.reg, this->newCurbeEntry(GBE_CURBE_KERNEL_ARGUMENT, argID, arg.size, ptrSize));
-    }
-
-    // Already inserted registers go here
-    const size_t localIDSize = sizeof(uint32_t) * this->simdWidth;
-    insertCurbeReg(ir::ocl::lid0, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_X, 0, localIDSize));
-    insertCurbeReg(ir::ocl::lid1, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Y, 0, localIDSize));
-    insertCurbeReg(ir::ocl::lid2, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Z, 0, localIDSize));
-
-    // Go over all the instructions and find the special register we need
-    // to push
-#define INSERT_REG(SPECIAL_REG, PATCH, WIDTH) \
-  if (reg == ir::ocl::SPECIAL_REG) { \
-    if (curbeRegs.find(reg) != curbeRegs.end()) continue; \
-    insertCurbeReg(reg, this->newCurbeEntry(GBE_CURBE_##PATCH, 0, ptrSize * WIDTH)); \
-  } else
-
-    bool useStackPtr = false;
-    fn.foreachInstruction([&](ir::Instruction &insn) {
-      const uint32_t srcNum = insn.getSrcNum();
-      for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
-        const ir::Register reg = insn.getSrc(srcID);
-        if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) {
-          if (srcID != 0) continue;
-          const unsigned char bti = ir::cast<ir::GetImageInfoInstruction>(insn).getImageIndex();
-          const unsigned char type =  ir::cast<ir::GetImageInfoInstruction>(insn).getInfoType();;
-          ir::ImageInfoKey key(bti, type);
-          const ir::Register imageInfo = insn.getSrc(0);
-          if (curbeRegs.find(imageInfo) == curbeRegs.end()) {
-            uint32_t offset = this->getImageInfoCurbeOffset(key, 4);
-            insertCurbeReg(imageInfo, offset);
-          }
-          continue;
-        }
-        if (fn.isSpecialReg(reg) == false) continue;
-        if (curbeRegs.find(reg) != curbeRegs.end()) continue;
-        if (reg == ir::ocl::stackptr) useStackPtr = true;
-        INSERT_REG(lsize0, LOCAL_SIZE_X, 1)
-        INSERT_REG(lsize1, LOCAL_SIZE_Y, 1)
-        INSERT_REG(lsize2, LOCAL_SIZE_Z, 1)
-        INSERT_REG(gsize0, GLOBAL_SIZE_X, 1)
-        INSERT_REG(gsize1, GLOBAL_SIZE_Y, 1)
-        INSERT_REG(gsize2, GLOBAL_SIZE_Z, 1)
-        INSERT_REG(goffset0, GLOBAL_OFFSET_X, 1)
-        INSERT_REG(goffset1, GLOBAL_OFFSET_Y, 1)
-        INSERT_REG(goffset2, GLOBAL_OFFSET_Z, 1)
-        INSERT_REG(workdim, WORK_DIM, 1)
-        INSERT_REG(numgroup0, GROUP_NUM_X, 1)
-        INSERT_REG(numgroup1, GROUP_NUM_Y, 1)
-        INSERT_REG(numgroup2, GROUP_NUM_Z, 1)
-        INSERT_REG(stackptr, STACK_POINTER, this->simdWidth)
-        do {} while(0);
-      }
-    });
-#undef INSERT_REG
-
-    // Insert the stack buffer if used
-    if (useStackPtr)
-      insertCurbeReg(ir::ocl::stackbuffer, this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize));
-
-    // After this point the vector is immutable. Sorting it will make
-    // research faster
-    std::sort(kernel->patches.begin(), kernel->patches.end());
-
-    kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE);
-  }
-
    void Context::buildArgList(void) {
      kernel->argNum = fn.argNum();
      if (kernel->argNum)
diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp

index 384a2fb..26167a0 100644 (file)
--- a/backend/src/backend/context.hpp
+++ b/backend/src/backend/context.hpp
@@ -105,8 +105,6 @@ namespace gbe
      virtual Kernel *allocateKernel(void) = 0;
      /*! Look if a stack is needed and allocate it */
      void buildStack(void);
-    /*! Build the curbe patch list for the given kernel */
-    void buildPatchList(void);
      /*! Build the list of arguments to set to launch the kernel */
      void buildArgList(void);
      /*! Build the sets of used labels */
@@ -121,6 +119,7 @@ namespace gbe
       *  of the entry
       */
      void insertCurbeReg(ir::Register, uint32_t grfOffset);
+    /*! allocate a curbe entry. */
      uint32_t newCurbeEntry(gbe_curbe_type value, uint32_t subValue, uint32_t size, uint32_t alignment = 0);
      /*! Provide for each branch and label the label index target */
      typedef map<const ir::Instruction*, ir::LabelIndex> JIPMap;
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp

index 8bcf454..51c6c97 100644 (file)
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -31,6 +31,7 @@
  #include "backend/gen_reg_allocation.hpp"
  #include "backend/gen/gen_mesa_disasm.h"
  #include "ir/function.hpp"
+#include "ir/value.hpp"
  #include "sys/cvar.hpp"
  #include <cstring>
  #include <iostream>
@@ -1860,8 +1861,116 @@ namespace gbe
  
    BVAR(OCL_OUTPUT_REG_ALLOC, false);
    BVAR(OCL_OUTPUT_ASM, false);
+
+  void GenContext::allocCurbeReg(ir::Register reg, gbe_curbe_type value, uint32_t subValue) {
+    uint32_t regSize;
+    regSize = this->ra->getRegSize(reg);
+    insertCurbeReg(reg, newCurbeEntry(value, subValue, regSize));
+  }
+
+  void GenContext::buildPatchList(void) {
+    const uint32_t ptrSize = unit.getPointerSize() == ir::POINTER_32_BITS ? 4u : 8u;
+    kernel->curbeSize = 0u;
+    auto &stackUse = dag->getUse(ir::ocl::stackptr);
+
+    // We insert the block IP mask first
+#if 0
+    this->insertCurbeReg(ir::ocl::blockip, this->newCurbeEntry(GBE_CURBE_BLOCK_IP, 0, this->simdWidth * sizeof(uint16_t)));
+    this->insertCurbeReg(ir::ocl::emask, this->newCurbeEntry(GBE_CURBE_EMASK, 0,  this->simdWidth * sizeof(uint16_t)));
+    this->insertCurbeReg(ir::ocl::notemask, this->newCurbeEntry(GBE_CURBE_NOT_EMASK, 0, sizeof(uint16_t)));
+    this->insertCurbeReg(ir::ocl::barriermask, this->newCurbeEntry(GBE_CURBE_BARRIER_MASK, 0, sizeof(uint16_t)));
+    // Already inserted registers go here
+    const size_t localIDSizde = sizeof(uint32_t) * this->simdWidth;
+    insertCurbeReg(ir::ocl::lid0, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_X, 0, localIDSize));
+    insertCurbeReg(ir::ocl::lid1, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Y, 0, localIDSize));
+    insertCurbeReg(ir::ocl::lid2, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Z, 0, localIDSize));
+    // Insert the stack buffer if used
+    if (stackUse.size() != 0)
+      insertCurbeReg(ir::ocl::stackbuffer, this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize));
+#else
+    using namespace ir::ocl;
+    allocCurbeReg(blockip, GBE_CURBE_BLOCK_IP);
+    allocCurbeReg(emask, GBE_CURBE_EMASK);
+    allocCurbeReg(notemask, GBE_CURBE_NOT_EMASK);
+    allocCurbeReg(barriermask, GBE_CURBE_BARRIER_MASK);
+    allocCurbeReg(lid0, GBE_CURBE_LOCAL_ID_X);
+    allocCurbeReg(lid1, GBE_CURBE_LOCAL_ID_Y);
+    allocCurbeReg(lid2, GBE_CURBE_LOCAL_ID_Z);
+    if (stackUse.size() != 0)
+      allocCurbeReg(stackbuffer, GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER);
+#endif
+    // Go over the arguments and find the related patch locations
+    const uint32_t argNum = fn.argNum();
+    for (uint32_t argID = 0u; argID < argNum; ++argID) {
+      const ir::FunctionArgument &arg = fn.getArg(argID);
+      // For pointers and values, we have nothing to do. We just push the values
+      if (arg.type == ir::FunctionArgument::GLOBAL_POINTER ||
+          arg.type == ir::FunctionArgument::LOCAL_POINTER ||
+          arg.type == ir::FunctionArgument::CONSTANT_POINTER ||
+          arg.type == ir::FunctionArgument::VALUE ||
+          arg.type == ir::FunctionArgument::STRUCTURE ||
+          arg.type == ir::FunctionArgument::IMAGE ||
+          arg.type == ir::FunctionArgument::SAMPLER)
+        this->insertCurbeReg(arg.reg, this->newCurbeEntry(GBE_CURBE_KERNEL_ARGUMENT, argID, arg.size, ptrSize));
+    }
+
+    // Go over all the instructions and find the special register we need
+    // to push
+    #define INSERT_REG(SPECIAL_REG, PATCH) \
+    if (reg == ir::ocl::SPECIAL_REG) { \
+      if (curbeRegs.find(reg) != curbeRegs.end()) continue; \
+      allocCurbeReg(reg, GBE_CURBE_##PATCH); \
+    } else
+  
+    fn.foreachInstruction([&](ir::Instruction &insn) {
+      const uint32_t srcNum = insn.getSrcNum();
+      for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+        const ir::Register reg = insn.getSrc(srcID);
+        if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) {
+          if (srcID != 0) continue;
+          const unsigned char bti = ir::cast<ir::GetImageInfoInstruction>(insn).getImageIndex();
+          const unsigned char type =  ir::cast<ir::GetImageInfoInstruction>(insn).getInfoType();;
+          ir::ImageInfoKey key(bti, type);
+          const ir::Register imageInfo = insn.getSrc(0);
+          if (curbeRegs.find(imageInfo) == curbeRegs.end()) {
+            uint32_t offset = this->getImageInfoCurbeOffset(key, 4);
+            insertCurbeReg(imageInfo, offset);
+          }
+          continue;
+        }
+        if (fn.isSpecialReg(reg) == false) continue;
+        if (curbeRegs.find(reg) != curbeRegs.end()) continue;
+        if (reg == ir::ocl::stackptr) GBE_ASSERT(stackUse.size() > 0);
+        INSERT_REG(lsize0, LOCAL_SIZE_X)
+        INSERT_REG(lsize1, LOCAL_SIZE_Y)
+        INSERT_REG(lsize2, LOCAL_SIZE_Z)
+        INSERT_REG(gsize0, GLOBAL_SIZE_X)
+        INSERT_REG(gsize1, GLOBAL_SIZE_Y)
+        INSERT_REG(gsize2, GLOBAL_SIZE_Z)
+        INSERT_REG(goffset0, GLOBAL_OFFSET_X)
+        INSERT_REG(goffset1, GLOBAL_OFFSET_Y)
+        INSERT_REG(goffset2, GLOBAL_OFFSET_Z)
+        INSERT_REG(workdim, WORK_DIM)
+        INSERT_REG(numgroup0, GROUP_NUM_X)
+        INSERT_REG(numgroup1, GROUP_NUM_Y)
+        INSERT_REG(numgroup2, GROUP_NUM_Z)
+        INSERT_REG(stackptr, STACK_POINTER)
+        do {} while(0);
+      }
+    });
+#undef INSERT_REG
+
+
+    // After this point the vector is immutable. Sorting it will make
+    // research faster
+    std::sort(kernel->patches.begin(), kernel->patches.end());
+
+    kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE);
+  }
+
    bool GenContext::emitCode(void) {
      GenKernel *genKernel = static_cast<GenKernel*>(this->kernel);
+    buildPatchList();
      sel->select();
      schedulePreRegAllocation(*this, *this->sel);
      if (UNLIKELY(ra->allocate(*this->sel) == false))
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp

index 642301c..6ec43cc 100644 (file)
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -162,6 +162,12 @@ namespace gbe
       * regenerating the code
       */
      bool limitRegisterPressure;
+  private:
+    /*! Build the curbe patch list for the given kernel */
+    void buildPatchList(void);
+    /*! allocate a new curbe register and insert to curbe pool. */
+    void allocCurbeReg(ir::Register reg, gbe_curbe_type value, uint32_t subValue = 0);
+
    };
  
  } /* namespace gbe */
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp

index f446a5b..2ba9495 100644 (file)
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -1001,5 +1001,11 @@ namespace gbe
      this->opaque->outputAllocation();
    }
  
+  uint32_t GenRegAllocator::getRegSize(ir::Register reg) {
+     uint32_t regSize; 
+     this->opaque->getRegAttrib(reg, regSize); 
+     return regSize;
+  }
+
  } /* namespace gbe */
  
diff --git a/backend/src/backend/gen_reg_allocation.hpp b/backend/src/backend/gen_reg_allocation.hpp

index bccccc8..a2a1d40 100644 (file)
--- a/backend/src/backend/gen_reg_allocation.hpp
+++ b/backend/src/backend/gen_reg_allocation.hpp
@@ -57,6 +57,8 @@ namespace gbe
      GenRegister genReg(const GenRegister &reg);
      /*! Output the register allocation */
      void outputAllocation(void);
+    /*! Get register actual size in byte. */
+    uint32_t getRegSize(ir::Register reg);
    private:
      /*! Actual implementation of the register allocator (use Pimpl) */
      class Opaque;
diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp

index e6fc411..83fb0b4 100644 (file)
--- a/backend/src/backend/program.hpp
+++ b/backend/src/backend/program.hpp
@@ -180,6 +180,7 @@ namespace gbe {
  
    protected:
      friend class Context;      //!< Owns the kernels
+    friend class GenContext;
      std::string name;    //!< Kernel name
      KernelArgument *args;      //!< Each argument
      vector<PatchInfo> patches; //!< Indicates how to build the curbe
author	Zhigang Gong <zhigang.gong@intel.com>
	Wed, 12 Mar 2014 09:08:15 +0000 (17:08 +0800)
committer	Zhigang Gong <zhigang.gong@intel.com>
	Tue, 8 Apr 2014 08:21:32 +0000 (16:21 +0800)
backend/src/backend/context.cpp		patch \| blob \| history
backend/src/backend/context.hpp		patch \| blob \| history
backend/src/backend/gen_context.cpp		patch \| blob \| history
backend/src/backend/gen_context.hpp		patch \| blob \| history
backend/src/backend/gen_reg_allocation.cpp		patch \| blob \| history
backend/src/backend/gen_reg_allocation.hpp		patch \| blob \| history
backend/src/backend/program.hpp		patch \| blob \| history