Simplify the way we handle curbe

author Benjamin Segovia <segovia.benjamin@gmail.com>

Wed, 25 Apr 2012 20:30:48 +0000 (20:30 +0000)

committer Keith Packard <keithp@keithp.com>

Fri, 10 Aug 2012 23:16:44 +0000 (16:16 -0700)
author Benjamin Segovia <segovia.benjamin@gmail.com>
Wed, 25 Apr 2012 20:30:48 +0000 (20:30 +0000)
committer Keith Packard <keithp@keithp.com>
Fri, 10 Aug 2012 23:16:44 +0000 (16:16 -0700)
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp

index 5fa0260..85f3e92 100644 (file)
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -34,7 +34,6 @@
  
  namespace gbe
  {
-
    IVAR(OCL_SIMD_WIDTH, 8, 16, 32);
  
    Context::Context(const ir::Unit &unit, const std::string &name) :
@@ -76,18 +75,42 @@ namespace gbe
        }
      }
  
+    // Already inserted registers go here
+    set<ir::Register> specialRegs;
+
+    // We insert the block IP mask first
+    kernel->patches.push_back(PatchInfo(GBE_CURBE_BLOCK_IP, 0, kernel->curbeSize));
+    kernel->curbeSize += this->simdWidth * sizeof(uint16_t);
+
+    // Then the local IDs (not scalar, so we align them properly)
+    kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE);
+    if (this->simdWidth == 16 || this->simdWidth == 32)
+      if ((kernel->curbeSize + GEN_REG_SIZE) % (2*GEN_REG_SIZE) != 0)
+        kernel->curbeSize += GEN_REG_SIZE;
+    const size_t localIDSize = sizeof(uint32_t) * this->simdWidth;
+    const PatchInfo lid0(GBE_CURBE_LOCAL_ID_X, 0, kernel->curbeSize);
+    kernel->curbeSize += localIDSize;
+    const PatchInfo lid1(GBE_CURBE_LOCAL_ID_Y, 0, kernel->curbeSize);
+    kernel->curbeSize += localIDSize;
+    const PatchInfo lid2(GBE_CURBE_LOCAL_ID_Z, 0, kernel->curbeSize);
+    kernel->curbeSize += localIDSize;
+    kernel->patches.push_back(lid0);
+    kernel->patches.push_back(lid1);
+    kernel->patches.push_back(lid2);
+    specialRegs.insert(ir::ocl::lid0);
+    specialRegs.insert(ir::ocl::lid1);
+    specialRegs.insert(ir::ocl::lid2);
+
      // Go over all the instructions and find the special register value we need
      // to push
-#define INSERT_REG(SPECIAL_REG, PATCH)                              \
-  if (reg == ir::ocl::SPECIAL_REG) {                                \
-    if (specialRegs.find(reg) != specialRegs.end()) continue;       \
+#define INSERT_REG(SPECIAL_REG, PATCH) \
+  if (reg == ir::ocl::SPECIAL_REG) { \
+    if (specialRegs.find(reg) != specialRegs.end()) continue; \
      const PatchInfo patch(GBE_CURBE_##PATCH, 0, kernel->curbeSize); \
-    kernel->patches.push_back(patch);                               \
-    kernel->curbeSize += ptrSize;                                   \
+    kernel->patches.push_back(patch); \
+    kernel->curbeSize += ptrSize; \
    } else
-    set<ir::Register> specialRegs; // already inserted registers
      fn.foreachInstruction([&](const ir::Instruction &insn) {
-      // Special registers are immutable. So only check sources
        const uint32_t srcNum = insn.getSrcNum();
        for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
          const ir::Register reg = insn.getSrc(srcID);
@@ -108,24 +131,14 @@ namespace gbe
          specialRegs.insert(reg);
        }
      });
-
-    kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE);
-    if (this->simdWidth == 16)
-      if ((kernel->curbeSize + GEN_REG_SIZE) % (2*GEN_REG_SIZE) != 0)
-        kernel->curbeSize += GEN_REG_SIZE;
-
-    // Local IDs always go at the end of the curbe
-    const size_t localIDSize = sizeof(uint32_t) * this->simdWidth;
-    const PatchInfo lid0(GBE_CURBE_LOCAL_ID_X, 0, kernel->curbeSize+0*localIDSize);
-    const PatchInfo lid1(GBE_CURBE_LOCAL_ID_Y, 0, kernel->curbeSize+1*localIDSize);
-    const PatchInfo lid2(GBE_CURBE_LOCAL_ID_Z, 0, kernel->curbeSize+2*localIDSize);
-    kernel->patches.push_back(lid0);
-    kernel->patches.push_back(lid1);
-    kernel->patches.push_back(lid2);
+#undef INSERT_REG
  
      // After this point the vector is immutable. Sorting it will make
      // research faster
      std::sort(kernel->patches.begin(), kernel->patches.end());
+
+    // Align it on 128 bytes properly
+    kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE);
    }
  
    void Context::buildArgList(void) {
@@ -171,8 +184,7 @@ namespace gbe
  
    bool Context::isScalarReg(const ir::Register &reg) const {
      GBE_ASSERT(fn.getProfile() == ir::Profile::PROFILE_OCL);
-    if (fn.getInput(reg) != NULL)
-      return true;
+    if (fn.getInput(reg) != NULL) return true;
      if (reg == ir::ocl::groupid0  ||
          reg == ir::ocl::groupid1  ||
          reg == ir::ocl::groupid2  ||
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp

index 24d562d..93ccd32 100644 (file)
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -131,7 +131,6 @@ namespace gbe
    void GenContext::emitUnaryInstruction(const ir::UnaryInstruction &insn) {
      GBE_ASSERT(insn.getOpcode() == ir::OP_MOV);
      p->MOV(reg(insn.getDst(0)), reg(insn.getSrc(0)));
-    //p->MOV(GenReg::retype(reg(insn.getDst(0)), GEN_TYPE_UD), GenReg::retype(reg(insn.getSrc(0)), GEN_TYPE_UD));
    }
  
    void GenContext::emitBinaryInstruction(const ir::BinaryInstruction &insn) {
@@ -243,7 +242,6 @@ namespace gbe
      const GenReg value = reg(insn.getValue(0));
      // XXX remove that later. Now we just copy everything to GRFs to make it
      // contiguous
-#if 1
      if (this->simdWidth == 8) {
        p->MOV(GenReg::vec8grf(112, 0), GenReg::retype(address, GEN_TYPE_F));
        p->MOV(GenReg::vec8grf(113, 0), GenReg::retype(value, GEN_TYPE_F));
@@ -254,19 +252,6 @@ namespace gbe
        p->UNTYPED_WRITE(GenReg::vec16grf(112, 0), 0, 1);
      } else
        NOT_IMPLEMENTED;
-#else
-    if (this->simdWidth == 8) {
-      p->MOV(GenReg::ud8grf(112, 0), GenReg::retype(address, GEN_TYPE_UD));
-      p->MOV(GenReg::ud8grf(113, 0), GenReg::retype(value, GEN_TYPE_UD));
-      p->UNTYPED_WRITE(GenReg::vec8grf(112, 0), 0, 1);
-    } else if (this->simdWidth == 16) {
-      p->MOV(GenReg::ud16grf(112, 0), GenReg::retype(address, GEN_TYPE_UD));
-      p->MOV(GenReg::ud16grf(114, 0), GenReg::retype(value, GEN_TYPE_UD));
-      p->UNTYPED_WRITE(GenReg::vec16grf(112, 0), 0, 1);
-    } else
-      NOT_IMPLEMENTED;
-
-#endif
    }
    void GenContext::emitFenceInstruction(const ir::FenceInstruction &insn) {}
    void GenContext::emitLabelInstruction(const ir::LabelInstruction &insn) {}
diff --git a/backend/src/backend/gen_eu.hpp b/backend/src/backend/gen_eu.hpp

index 583cb80..2fa8abc 100644 (file)
--- a/backend/src/backend/gen_eu.hpp
+++ b/backend/src/backend/gen_eu.hpp
@@ -141,38 +141,38 @@ namespace gbe
  
      static INLINE GenReg vec16(uint32_t file, uint32_t nr, uint32_t subnr) {
        return GenReg(file,
-          nr,
-          subnr,
-          GEN_TYPE_F,
-          GEN_VERTICAL_STRIDE_8,
-          GEN_WIDTH_8,
-          GEN_HORIZONTAL_STRIDE_1,
-          GEN_SWIZZLE_XYZW,
-          WRITEMASK_XYZW);
+                    nr,
+                    subnr,
+                    GEN_TYPE_F,
+                    GEN_VERTICAL_STRIDE_8,
+                    GEN_WIDTH_8,
+                    GEN_HORIZONTAL_STRIDE_1,
+                    GEN_SWIZZLE_XYZW,
+                    WRITEMASK_XYZW);
      }
  
      static INLINE GenReg vec8(uint32_t file, uint32_t nr, uint32_t subnr) {
        return GenReg(file,
-          nr,
-          subnr,
-          GEN_TYPE_F,
-          GEN_VERTICAL_STRIDE_8,
-          GEN_WIDTH_8,
-          GEN_HORIZONTAL_STRIDE_1,
-          GEN_SWIZZLE_XYZW,
-          WRITEMASK_XYZW);
+                    nr,
+                    subnr,
+                    GEN_TYPE_F,
+                    GEN_VERTICAL_STRIDE_8,
+                    GEN_WIDTH_8,
+                    GEN_HORIZONTAL_STRIDE_1,
+                    GEN_SWIZZLE_XYZW,
+                    WRITEMASK_XYZW);
      }
  
      static INLINE GenReg vec4(uint32_t file, uint32_t nr, uint32_t subnr) {
        return GenReg(file,
-          nr,
-          subnr,
-          GEN_TYPE_F,
-          GEN_VERTICAL_STRIDE_4,
-          GEN_WIDTH_4,
-          GEN_HORIZONTAL_STRIDE_1,
-          GEN_SWIZZLE_XYZW,
-          WRITEMASK_XYZW);
+                    nr,
+                    subnr,
+                    GEN_TYPE_F,
+                    GEN_VERTICAL_STRIDE_4,
+                    GEN_WIDTH_4,
+                    GEN_HORIZONTAL_STRIDE_1,
+                    GEN_SWIZZLE_XYZW,
+                    WRITEMASK_XYZW);
      }
  
      static INLINE GenReg vec2(uint32_t file, uint32_t nr, uint32_t subnr) {
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h

index 56de46a..64020be 100644 (file)
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -71,7 +71,8 @@ enum gbe_curbe_type {
    GBE_CURBE_IMAGE_WIDTH,
    GBE_CURBE_IMAGE_HEIGHT,
    GBE_CURBE_IMAGE_DEPTH,
-  GBE_CURBE_KERNEL_ARGUMENT
+  GBE_CURBE_KERNEL_ARGUMENT,
+  GBE_CURBE_BLOCK_IP
  };
  
  /*! Create a new program from the given source code (zero terminated string) */
diff --git a/backend/src/backend/sim_context.cpp b/backend/src/backend/sim_context.cpp

index 04176f5..8b43c1f 100644 (file)
--- a/backend/src/backend/sim_context.cpp
+++ b/backend/src/backend/sim_context.cpp
@@ -55,6 +55,7 @@ namespace gbe
          usedRegs.insert(insn.getDst(dstID));
      });
  
+    // Declare register variables
      const uint32_t regNum = fn.regNum();
      bool lid0 = false, lid1 = false, lid2 = false; // for local id registers
      for (uint32_t regID = 0; regID < regNum; ++regID) {
@@ -276,8 +277,8 @@ namespace gbe
  #undef LOAD_SPECIAL_REG
  
    SVAR(OCL_GCC_SIM_COMPILER, "gcc");
-  SVAR(OCL_GCC_SIM_COMPILER_OPTIONS, "-Wall -fPIC -shared -msse -msse2 -msse3 -mssse3 -msse4.1 -g -O3");
    SVAR(OCL_ICC_SIM_COMPILER, "icc");
+  SVAR(OCL_GCC_SIM_COMPILER_OPTIONS, "-Wall -fPIC -shared -msse -msse2 -msse3 -mssse3 -msse4.1 -g -O3");
    SVAR(OCL_ICC_SIM_COMPILER_OPTIONS, "-Wall -ldl -fabi-version=2 -fPIC -shared -O3 -g");
    BVAR(OCL_USE_ICC, false);
  
diff --git a/backend/src/backend/sim_context.hpp b/backend/src/backend/sim_context.hpp

index 61fcb43..832cd23 100644 (file)
--- a/backend/src/backend/sim_context.hpp
+++ b/backend/src/backend/sim_context.hpp
@@ -54,6 +54,8 @@ namespace gbe
      void emitCurbeLoad(void);
      /*! Emit the masking code (mask / UIP) */
      void emitMaskingCode(void);
+    /*! Emit the instructions */
+    void emitInstructionStream(void);
      /*! Implements base class */
      virtual Kernel *allocateKernel(void);
      std::ofstream o; //!< Where to output the c++ string
author	Benjamin Segovia <segovia.benjamin@gmail.com>
	Wed, 25 Apr 2012 20:30:48 +0000 (20:30 +0000)
committer	Keith Packard <keithp@keithp.com>
	Fri, 10 Aug 2012 23:16:44 +0000 (16:16 -0700)
backend/src/backend/context.cpp		patch \| blob \| history
backend/src/backend/gen_context.cpp		patch \| blob \| history
backend/src/backend/gen_eu.hpp		patch \| blob \| history
backend/src/backend/program.h		patch \| blob \| history
backend/src/backend/sim_context.cpp		patch \| blob \| history
backend/src/backend/sim_context.hpp		patch \| blob \| history