Played with SIMD8

author Benjamin Segovia <segovia.benjamin@gmail.com>

Wed, 25 Apr 2012 22:37:42 +0000 (22:37 +0000)

committer Keith Packard <keithp@keithp.com>

Fri, 10 Aug 2012 23:16:46 +0000 (16:16 -0700)
author Benjamin Segovia <segovia.benjamin@gmail.com>
Wed, 25 Apr 2012 22:37:42 +0000 (22:37 +0000)
committer Keith Packard <keithp@keithp.com>
Fri, 10 Aug 2012 23:16:46 +0000 (16:16 -0700)
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp

index 85f3e92..be3b6c1 100644 (file)
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -51,6 +51,7 @@ namespace gbe
  
    Kernel *Context::compileKernel(void) {
      this->kernel = this->allocateKernel();
+    this->kernel->simdWidth = this->simdWidth;
      this->buildPatchList();
      this->buildArgList();
      this->buildUsedLabels();
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp

index 93ccd32..e3019fb 100644 (file)
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -38,9 +38,12 @@ namespace gbe
      p = GBE_NEW(GenEmitter, simdWidth, 7); // XXX handle more than gen7
    }
  
-
    GenContext::~GenContext(void) { GBE_DELETE(p); }
  
+  // Per-lane block IPs are always pre-allocated and used for branches. We just
+  // 0xffff as a fake register for them
+  static const ir::Register blockIP(0xffff);
+
    void GenContext::allocatePayloadReg(gbe_curbe_type value,
                                        uint32_t subValue,
                                        const ir::Register &reg)
@@ -53,11 +56,11 @@ namespace gbe
        const uint32_t subnr = ((offset + GEN_REG_SIZE) % GEN_REG_SIZE) / typeSize;
        GBE_ASSERT(data.family == ir::FAMILY_DWORD); // XXX support the rest
        if (this->isScalarReg(reg) == true)
-        RA.insert(std::make_pair(reg, GenReg::vec1(GEN_GENERAL_REGISTER_FILE, nr, subnr)));
+        RA.insert(std::make_pair(reg, GenReg::f1grf(nr, subnr)));
        else if (this->simdWidth == 8)
-        RA.insert(std::make_pair(reg, GenReg::vec8(GEN_GENERAL_REGISTER_FILE, nr, subnr)));
+        RA.insert(std::make_pair(reg, GenReg::f8grf(nr, subnr)));
        else if (this->simdWidth == 16)
-        RA.insert(std::make_pair(reg, GenReg::vec16(GEN_GENERAL_REGISTER_FILE, nr, subnr)));
+        RA.insert(std::make_pair(reg, GenReg::f16grf(nr, subnr)));
      }
    }
  
@@ -83,9 +86,20 @@ namespace gbe
      allocatePayloadReg(GBE_CURBE_GROUP_NUM_Z, 0, ocl::numgroup2);
  
      // Group IDs are always allocated by the hardware in r0
-    RA.insert(std::make_pair(ocl::groupid0, GenReg::vec1(GEN_GENERAL_REGISTER_FILE, 0, 1)));
-    RA.insert(std::make_pair(ocl::groupid1, GenReg::vec1(GEN_GENERAL_REGISTER_FILE, 0, 6)));
-    RA.insert(std::make_pair(ocl::groupid2, GenReg::vec1(GEN_GENERAL_REGISTER_FILE, 0, 7)));
+    RA.insert(std::make_pair(ocl::groupid0, GenReg::f1grf(0, 1)));
+    RA.insert(std::make_pair(ocl::groupid1, GenReg::f1grf(0, 6)));
+    RA.insert(std::make_pair(ocl::groupid2, GenReg::f1grf(0, 7)));
+
+    // block IP used to handle the mask in SW is always allocated
+    int32_t blockIPOffset = kernel->getCurbeOffset(GBE_CURBE_BLOCK_IP,0);
+    GBE_ASSERT(blockIPOffset >= 0 && blockIPOffset % GEN_REG_SIZE == 0);
+    blockIPOffset /= GEN_REG_SIZE;
+    if (simdWidth == 8)
+      RA.insert(std::make_pair(blockIP, GenReg::uw8grf(blockIPOffset, 0)));
+    else if (simdWidth == 16)
+      RA.insert(std::make_pair(blockIP, GenReg::uw16grf(blockIPOffset, 0)));
+    else
+      NOT_SUPPORTED;
  
      // Allocate all input parameters
      const uint32_t inputNum = fn.inputNum();
@@ -107,12 +121,9 @@ namespace gbe
      });
  
      // Allocate all used registers. Just crash when we run out-of-registers
-    // r0 is always taken by the HW. We also always write down local IDs after
-    // the curbe data
-    uint32_t grfOffset = kernel->getCurbeSize() + GEN_REG_SIZE
-                       + 3 * sizeof(uint32_t) * this->simdWidth;
+    uint32_t grfOffset = kernel->getCurbeSize() + GEN_REG_SIZE;
      GBE_ASSERT(simdWidth != 32); // XXX a bit more complicated see later
-    if (simdWidth == 16) grfOffset = ALIGN(grfOffset, 64);
+    if (simdWidth == 16) grfOffset = ALIGN(grfOffset, 2*GEN_REG_SIZE);
      for (auto reg : usedRegs) {
        if (fn.isSpecialReg(reg) == true) continue; // already done
        if (fn.getInput(reg) != NULL) continue; // already done
@@ -123,7 +134,7 @@ namespace gbe
        const uint32_t subnr = (grfOffset % GEN_REG_SIZE) / typeSize;
        GBE_ASSERT(family == FAMILY_DWORD); // XXX Do the rest
        GBE_ASSERT(grfOffset + simdWidth*typeSize < GEN_GRF_SIZE);
-      RA.insert(std::make_pair(reg, GenReg::vec16(GEN_GENERAL_REGISTER_FILE, nr, subnr)));
+      RA.insert(std::make_pair(reg, GenReg::f16grf(nr, subnr)));
        grfOffset += simdWidth * typeSize;
      }
    }
@@ -159,15 +170,15 @@ namespace gbe
      // Output the binary instruction
      switch (opcode) {
        case OP_ADD: p->ADD(dst, src0, src1); break;
+      case OP_SUB: p->ADD(dst, src0, GenReg::negate(src1)); break;
        case OP_MUL: 
        {
-    //    p->MUL(dst, src0, src1);
  #if 1
          if (type == TYPE_FLOAT)
            p->MUL(dst, src0, src1);
          else {
            const uint32_t width = p->curr.execWidth;
-          p->pushState();
+          p->push();
            p->curr.execWidth = 8;
            p->curr.quarterControl = GEN_COMPRESSION_Q1;
            p->MUL(GenReg::retype(GenReg::acc(), GEN_TYPE_D), src0, src1);
@@ -181,11 +192,11 @@ namespace gbe
              p->MUL(GenReg::retype(GenReg::acc(), GEN_TYPE_D), nextSrc0, nextSrc1);
              p->MACH(GenReg::retype(GenReg::null(), GEN_TYPE_D), nextSrc0, nextSrc1);
              p->curr.quarterControl = GEN_COMPRESSION_Q2;
-            p->MOV(GenReg::d8grf(116, 0), GenReg::retype(GenReg::acc(), GEN_TYPE_D));
+            p->MOV(GenReg::d8grf(116,0), GenReg::retype(GenReg::acc(), GEN_TYPE_D));
              p->curr.noMask = 0;
-            p->MOV(GenReg::next(dst), GenReg::d8grf(116, 0));
+            p->MOV(GenReg::next(dst), GenReg::d8grf(116,0));
            }
-          p->popState();
+          p->pop();
  
          }
  #endif
@@ -199,7 +210,17 @@ namespace gbe
    void GenContext::emitSelectInstruction(const ir::SelectInstruction &insn) {}
    void GenContext::emitCompareInstruction(const ir::CompareInstruction &insn) {}
    void GenContext::emitConvertInstruction(const ir::ConvertInstruction &insn) {}
-  void GenContext::emitBranchInstruction(const ir::BranchInstruction &insn) {}
+  void GenContext::emitBranchInstruction(const ir::BranchInstruction &insn) {
+    using namespace ir;
+    const Opcode opcode = insn.getOpcode();
+    GBE_ASSERT(opcode == OP_RET);
+    p->push();
+    p->curr.execWidth = 8;
+    p->curr.noMask = 1;
+    p->MOV(GenReg::f8grf(127,0), GenReg::f8grf(0,0));
+    p->pop();
+    p->EOT(127);
+  }
    void GenContext::emitTextureInstruction(const ir::TextureInstruction &insn) {}
  
    void GenContext::emitLoadImmInstruction(const ir::LoadImmInstruction &insn) {
@@ -243,13 +264,13 @@ namespace gbe
      // XXX remove that later. Now we just copy everything to GRFs to make it
      // contiguous
      if (this->simdWidth == 8) {
-      p->MOV(GenReg::vec8grf(112, 0), GenReg::retype(address, GEN_TYPE_F));
-      p->MOV(GenReg::vec8grf(113, 0), GenReg::retype(value, GEN_TYPE_F));
-      p->UNTYPED_WRITE(GenReg::vec8grf(112, 0), 0, 1);
+      p->MOV(GenReg::f8grf(112, 0), GenReg::retype(address, GEN_TYPE_F));
+      p->MOV(GenReg::f8grf(113, 0), GenReg::retype(value, GEN_TYPE_F));
+      p->UNTYPED_WRITE(GenReg::f8grf(112, 0), 0, 1);
      } else if (this->simdWidth == 16) {
-      p->MOV(GenReg::vec16grf(112, 0), GenReg::retype(address, GEN_TYPE_F));
-      p->MOV(GenReg::vec16grf(114, 0), GenReg::retype(value, GEN_TYPE_F));
-      p->UNTYPED_WRITE(GenReg::vec16grf(112, 0), 0, 1);
+      p->MOV(GenReg::f16grf(112, 0), GenReg::retype(address, GEN_TYPE_F));
+      p->MOV(GenReg::f16grf(114, 0), GenReg::retype(value, GEN_TYPE_F));
+      p->UNTYPED_WRITE(GenReg::f16grf(112, 0), 0, 1);
      } else
        NOT_IMPLEMENTED;
    }
@@ -274,7 +295,6 @@ namespace gbe
      GenKernel *genKernel = static_cast<GenKernel*>(this->kernel);
      this->allocateRegister();
      this->emitInstructionStream();
-    p->EOT(127);
      genKernel->insnNum = p->insnNum;
      genKernel->insns = GBE_NEW_ARRAY(GenInstruction, genKernel->insnNum);
      std::memcpy(genKernel->insns, p->store, genKernel->insnNum * sizeof(GenInstruction));
diff --git a/backend/src/backend/gen_eu.cpp b/backend/src/backend/gen_eu.cpp

index ad12dff..2d76ba4 100644 (file)
--- a/backend/src/backend/gen_eu.cpp
+++ b/backend/src/backend/gen_eu.cpp
@@ -591,8 +591,8 @@ namespace gbe
    void GenEmitter::NOP(void)
    {
      GenInstruction *insn = this->next(GEN_OPCODE_NOP);
-    this->setDst(insn, GenReg::retype(GenReg::vec4grf(0,0), GEN_TYPE_UD));
-    this->setSrc0(insn, GenReg::retype(GenReg::vec4grf(0,0), GEN_TYPE_UD));
+    this->setDst(insn, GenReg::retype(GenReg::f4grf(0,0), GEN_TYPE_UD));
+    this->setSrc0(insn, GenReg::retype(GenReg::f4grf(0,0), GEN_TYPE_UD));
      this->setSrc1(insn, GenReg::immud(0x0));
    }
  
@@ -767,18 +767,13 @@ namespace gbe
                           return_format);
    }
  
-  void GenEmitter::EOT(uint32_t msg_nr)
+  void GenEmitter::EOT(uint32_t msg)
    {
      GenInstruction *insn = NULL;
  
-    this->pushState();
-    this->curr.execWidth = 8;
-    insn = this->MOV(GenReg::vec8grf(msg_nr,0), GenReg::vec8grf(0,0));
-    this->popState();
-    insn->header.mask_control = GEN_MASK_DISABLE;
      insn = this->next(GEN_OPCODE_SEND);
      this->setDst(insn, GenReg::retype(GenReg::null(), GEN_TYPE_UD));
-    this->setSrc0(insn, GenReg::ud8grf(msg_nr,0));
+    this->setSrc0(insn, GenReg::ud8grf(msg,0));
      this->setSrc1(insn, GenReg::immud(0));
      insn->header.execution_size = GEN_WIDTH_8;
      insn->bits3.spawner_gen5.resource = GEN_DO_NOT_DEREFERENCE_URB;
diff --git a/backend/src/backend/gen_eu.hpp b/backend/src/backend/gen_eu.hpp

index 2fa8abc..55147d1 100644 (file)
--- a/backend/src/backend/gen_eu.hpp
+++ b/backend/src/backend/gen_eu.hpp
@@ -177,26 +177,26 @@ namespace gbe
  
      static INLINE GenReg vec2(uint32_t file, uint32_t nr, uint32_t subnr) {
        return GenReg(file,
-          nr,
-          subnr,
-          GEN_TYPE_F,
-          GEN_VERTICAL_STRIDE_2,
-          GEN_WIDTH_2,
-          GEN_HORIZONTAL_STRIDE_1,
-          GEN_SWIZZLE_XYXY,
-          WRITEMASK_XY);
+                    nr,
+                    subnr,
+                    GEN_TYPE_F,
+                    GEN_VERTICAL_STRIDE_2,
+                    GEN_WIDTH_2,
+                    GEN_HORIZONTAL_STRIDE_1,
+                    GEN_SWIZZLE_XYXY,
+                    WRITEMASK_XY);
      }
  
      static INLINE GenReg vec1(uint32_t file, uint32_t nr, uint32_t subnr) {
        return GenReg(file,
-          nr,
-          subnr,
-          GEN_TYPE_F,
-          GEN_VERTICAL_STRIDE_0,
-          GEN_WIDTH_1,
-          GEN_HORIZONTAL_STRIDE_0,
-          GEN_SWIZZLE_XXXX,
-          WRITEMASK_X);
+                    nr,
+                    subnr,
+                    GEN_TYPE_F,
+                    GEN_VERTICAL_STRIDE_0,
+                    GEN_WIDTH_1,
+                    GEN_HORIZONTAL_STRIDE_0,
+                    GEN_SWIZZLE_XXXX,
+                    WRITEMASK_X);
      }
  
      static INLINE GenReg retype(GenReg reg, uint32_t type) {
@@ -326,23 +326,23 @@ namespace gbe
        return immuw(reg.nr * GEN_REG_SIZE + reg.subnr);
      }
  
-    static INLINE GenReg vec1grf(uint32_t nr, uint32_t subnr) {
+    static INLINE GenReg f1grf(uint32_t nr, uint32_t subnr) {
        return vec1(GEN_GENERAL_REGISTER_FILE, nr, subnr);
      }
  
-    static INLINE GenReg vec2grf(uint32_t nr, uint32_t subnr) {
+    static INLINE GenReg f2grf(uint32_t nr, uint32_t subnr) {
        return vec2(GEN_GENERAL_REGISTER_FILE, nr, subnr);
      }
  
-    static INLINE GenReg vec4grf(uint32_t nr, uint32_t subnr) {
+    static INLINE GenReg f4grf(uint32_t nr, uint32_t subnr) {
        return vec4(GEN_GENERAL_REGISTER_FILE, nr, subnr);
      }
  
-    static INLINE GenReg vec8grf(uint32_t nr, uint32_t subnr) {
+    static INLINE GenReg f8grf(uint32_t nr, uint32_t subnr) {
        return vec8(GEN_GENERAL_REGISTER_FILE, nr, subnr);
      }
  
-    static INLINE GenReg vec16grf(uint32_t nr, uint32_t subnr) {
+    static INLINE GenReg f16grf(uint32_t nr, uint32_t subnr) {
        return vec16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
      }
  
@@ -465,7 +465,7 @@ namespace gbe
      }
  
      static INLINE GenReg vec4_indirect(uint32_t subnr, int offset) {
-      GenReg reg =  vec4grf(0, 0);
+      GenReg reg =  f4grf(0, 0);
        reg.subnr = subnr;
        reg.address_mode = GEN_ADDRESS_REGISTER_INDIRECT_REGISTER;
        reg.dw1.bits.indirect_offset = offset;
@@ -473,7 +473,7 @@ namespace gbe
      }
  
      static INLINE GenReg vec1_indirect(uint32_t subnr, int offset) {
-      GenReg reg =  vec1grf(0, 0);
+      GenReg reg =  f1grf(0, 0);
        reg.subnr = subnr;
        reg.address_mode = GEN_ADDRESS_REGISTER_INDIRECT_REGISTER;
        reg.dw1.bits.indirect_offset = offset;
@@ -527,12 +527,12 @@ namespace gbe
      /*! Size of the stack (should be large enough) */
      enum { MAX_STATE_NUM = 16 };
      /*! Push the current instruction state */
-    INLINE void pushState(void) {
+    INLINE void push(void) {
        assert(stateNum < MAX_STATE_NUM);
        stack[stateNum++] = curr;
      }
      /*! Pop the latest pushed state */
-    INLINE void popState(void) {
+    INLINE void pop(void) {
        assert(stateNum > 0);
        curr = stack[--stateNum];
      }
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp

index 9ec86f3..c049dba 100644 (file)
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -171,8 +171,10 @@ namespace gbe {
      return kernel->getArgType(argID);
    }
  
-  static uint32_t kernelGetSIMDWidth(gbe_kernel kernel) {
-    return 16u;
+  static uint32_t kernelGetSIMDWidth(gbe_kernel genKernel) {
+    if (genKernel == NULL) return GBE_ARG_INVALID;
+    const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
+    return kernel->getSIMDWidth();
    }
  
    static int32_t kernelGetCurbeOffset(gbe_kernel genKernel, gbe_curbe_type type, uint32_t subType) {
diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp

index ecd462e..7d358a4 100644 (file)
--- a/backend/src/backend/program.hpp
+++ b/backend/src/backend/program.hpp
@@ -87,6 +87,8 @@ namespace gbe {
      int32_t getCurbeOffset(gbe_curbe_type type, uint32_t subType) const;
      /*! Get the curbe size required by the kernel */
      uint32_t getCurbeSize(void) const { return this->curbeSize; }
+    /*! Get the SIMD width for the kernel */
+    uint32_t getSIMDWidth(void) const { return this->simdWidth; }
    protected:
      friend class Context;       //!< Owns the kernels
      const std::string name;     //!< Kernel name
@@ -94,6 +96,7 @@ namespace gbe {
      uint32_t argNum;            //!< Number of function arguments
      vector<PatchInfo> patches;  //!< Indicates how to build the curbe
      uint32_t curbeSize;         //!< Size of the data to push
+    uint32_t simdWidth;         //!< SIMD size for the kernel
    };
  
    /*! Describe a compiled program */
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp

index 403855d..5e15574 100644 (file)
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -31,11 +31,11 @@ namespace ir {
    namespace ocl
    {
  #if GBE_DEBUG
-#define DECL_NEW_REG(FAMILY, REG)       \
-   r = fn.newRegister(FAMILY_DWORD);    \
+#define DECL_NEW_REG(FAMILY, REG) \
+   r = fn.newRegister(FAMILY_DWORD); \
     GBE_ASSERT(r == REG);
  #else
-#define DECL_NEW_REG(FAMILY, REG)       \
+#define DECL_NEW_REG(FAMILY, REG) \
     fn.newRegister(FAMILY_DWORD);
  #endif /* GBE_DEBUG */
      static void init(Function &fn) {
author	Benjamin Segovia <segovia.benjamin@gmail.com>
	Wed, 25 Apr 2012 22:37:42 +0000 (22:37 +0000)
committer	Keith Packard <keithp@keithp.com>
	Fri, 10 Aug 2012 23:16:46 +0000 (16:16 -0700)
backend/src/backend/context.cpp		patch \| blob \| history
backend/src/backend/gen_context.cpp		patch \| blob \| history
backend/src/backend/gen_eu.cpp		patch \| blob \| history
backend/src/backend/gen_eu.hpp		patch \| blob \| history
backend/src/backend/program.cpp		patch \| blob \| history
backend/src/backend/program.hpp		patch \| blob \| history
backend/src/ir/profile.cpp		patch \| blob \| history