Made the first kernels work with the simulators Added some debug variables

author Benjamin Segovia <segovia.benjamin@gmail.com>

Fri, 13 Apr 2012 18:41:56 +0000 (18:41 +0000)

committer Keith Packard <keithp@keithp.com>

Fri, 10 Aug 2012 23:16:23 +0000 (16:16 -0700)
author Benjamin Segovia <segovia.benjamin@gmail.com>
Fri, 13 Apr 2012 18:41:56 +0000 (18:41 +0000)
committer Keith Packard <keithp@keithp.com>
Fri, 10 Aug 2012 23:16:23 +0000 (16:16 -0700)
diff --git a/backend/kernels/stdlib.h b/backend/kernels/stdlib.h

index 472655a..eaf4b17 100644 (file)
--- a/backend/kernels/stdlib.h
+++ b/backend/kernels/stdlib.h
@@ -43,7 +43,7 @@ DECL_PUBLIC_WORK_ITEM_FN(get_num_groups)
  #undef DECL_PUBLIC_WORK_ITEM_FN
  
  inline unsigned int get_global_id(unsigned int dim) {
-  return get_local_id(dim) + get_local_size(dim) * get_num_groups(dim);
+  return get_local_id(dim) + get_local_size(dim) * get_group_id(dim);
  }
  
  __attribute__ ((pure,const,overloadable)) float mad(float a, float b, float c);
diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt

index 7ee1714..db8beb7 100644 (file)
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -42,6 +42,9 @@ else (GBE_USE_BLOB)
      sys/condition.hpp
      sys/platform.cpp
      sys/platform.hpp
+    sys/debug.cpp
+    sys/debug.hpp
+    sys/debug.hxx
      ir/context.cpp
      ir/context.hpp
      ir/profile.cpp
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp

index 16b8fad..7f6ac89 100644 (file)
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -26,16 +26,24 @@
  #include "ir/unit.hpp"
  #include "ir/function.hpp"
  #include "ir/profile.hpp"
+#include "ir/liveness.hpp"
+#include "ir/value.hpp"
  #include <algorithm>
  
  namespace gbe
  {
    Context::Context(const ir::Unit &unit, const std::string &name) :
      unit(unit), fn(*unit.getFunction(name)), name(name), liveness(NULL), dag(NULL)
-  { GBE_ASSERT(unit.getPointerSize() == ir::POINTER_32_BITS);
+  {
+    GBE_ASSERT(unit.getPointerSize() == ir::POINTER_32_BITS);
+    this->liveness = GBE_NEW(ir::Liveness, (ir::Function&) fn);
+    this->dag = GBE_NEW(ir::FunctionDAG, *this->liveness);
      this->simdWidth = 16; /* XXX environment variable for that to start with */
    }
-  Context::~Context(void) {}
+  Context::~Context(void) {
+    GBE_SAFE_DELETE(this->dag);
+    GBE_SAFE_DELETE(this->liveness);
+  }
  
    Kernel *Context::compileKernel(void) {
      this->kernel = this->allocateKernel();
@@ -77,7 +85,7 @@ namespace gbe
        for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
          const ir::Register reg = insn.getSrc(srcID);
          if (fn.isSpecialReg(reg) == false) continue;
-
+        if (specialRegs.contains(reg) == true) continue;
          INSERT_REG(lsize0, LOCAL_SIZE_X)
          INSERT_REG(lsize1, LOCAL_SIZE_Y)
          INSERT_REG(lsize2, LOCAL_SIZE_Z)
@@ -90,10 +98,21 @@ namespace gbe
          INSERT_REG(numgroup0, GROUP_NUM_X)
          INSERT_REG(numgroup1, GROUP_NUM_Y)
          INSERT_REG(numgroup2, GROUP_NUM_Z);
+        specialRegs.insert(reg);
        }
      });
+    kernel->curbeSize = ALIGN(kernel->curbeSize, 32);
+
+    // Local IDs always go at the end of the curbe
+    const size_t localIDSize = sizeof(uint32_t) * this->simdWidth;
+    const PatchInfo lid0(GBE_CURBE_LOCAL_ID_X, 0, kernel->curbeSize+0*localIDSize);
+    const PatchInfo lid1(GBE_CURBE_LOCAL_ID_Y, 0, kernel->curbeSize+1*localIDSize);
+    const PatchInfo lid2(GBE_CURBE_LOCAL_ID_Z, 0, kernel->curbeSize+2*localIDSize);
+    kernel->patches.push_back(lid0);
+    kernel->patches.push_back(lid1);
+    kernel->patches.push_back(lid2);
  
-    // After this point the vector is immutable. so, Sorting it will make
+    // After this point the vector is immutable. Sorting it will make
      // research faster
      std::sort(kernel->patches.begin(), kernel->patches.end());
    }
diff --git a/backend/src/backend/sim/sim_vector.h b/backend/src/backend/sim/sim_vector.h

index cd9b5e6..3fb8597 100644 (file)
--- a/backend/src/backend/sim/sim_vector.h
+++ b/backend/src/backend/sim/sim_vector.h
@@ -157,6 +157,10 @@ INLINE void NAME(DST_TYPE &dst, const SRC_TYPE &v0, const scalar_dw &v1) {\
  template <uint32_t vectorNum>\
  INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const SRC_TYPE &v1) {\
    NAME(dst, simd_dw<vectorNum>(v0), v1);\
+}\
+template <uint32_t vectorNum>\
+INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const scalar_dw &v1) {\
+  NAME(dst, simd_dw<vectorNum>(v0), simd_dw<vectorNum>(v1));\
  }
  VEC_OP(simd_dw<vectorNum>, simd_dw<vectorNum>, ADD_F, _mm_add_ps, ID, ID, ID);
  VEC_OP(simd_dw<vectorNum>, simd_dw<vectorNum>, SUB_F, _mm_sub_ps, ID, ID, ID);
@@ -192,6 +196,10 @@ INLINE void NAME(DST_TYPE &dst, const SRC_TYPE &v0, const scalar_dw &v1) {\
  template <uint32_t vectorNum>\
  INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const SRC_TYPE &v1) {\
    NAME(dst, simd_dw<vectorNum>(v0), v1);\
+}\
+template <uint32_t vectorNum>\
+INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const scalar_dw &v1) {\
+  NAME(dst, simd_dw<vectorNum>(v0), simd_dw<vectorNum>(v1));\
  }
  VEC_OP(simd_m<vectorNum>, simd_dw<vectorNum>, GE_S32, _mm_cmplt_epi32, SI2PS, PS2SI, PS2SI);
  VEC_OP(simd_m<vectorNum>, simd_dw<vectorNum>, LE_S32, _mm_cmpgt_epi32, SI2PS, PS2SI, PS2SI);
@@ -215,6 +223,10 @@ INLINE void NAME(DST_TYPE &dst, const SRC_TYPE &v0, const scalar_dw &v1) {\
  template <uint32_t vectorNum>\
  INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const SRC_TYPE &v1) {\
    NAME(dst, simd_dw<vectorNum>(v0), v1);\
+}\
+template <uint32_t vectorNum>\
+INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const scalar_dw &v1) {\
+  NAME(dst, simd_dw<vectorNum>(v0), simd_dw<vectorNum>(v1));\
  }
  VEC_OP(simd_dw<vectorNum>, simd_dw<vectorNum>, MUL_S32, *, s);
  VEC_OP(simd_dw<vectorNum>, simd_dw<vectorNum>, DIV_S32, /, s);
@@ -262,24 +274,24 @@ VEC_OP(simd_m<vectorNum>, simd_dw<vectorNum>, GT_U32, >, u);
  template <uint32_t vectorNum>
  INLINE void NE_S32(simd_m<vectorNum> &dst,
                     const simd_dw<vectorNum> &v0,
-                   const scalar_dw &v1)
+                   const simd_dw<vectorNum> &v1)
  {
-  NE_S32(dst, v0, simd_dw<vectorNum>(v1));
+  for (uint32_t i = 0; i < vectorNum; ++i)
+    dst.m[i] = _mm_xor_ps(alltrue.v, SI2PS(_mm_cmpeq_epi32(PS2SI(v0.m[i]), PS2SI(v1.m[i]))));
  }
  template <uint32_t vectorNum>
  INLINE void NE_S32(simd_m<vectorNum> &dst,
-                   const scalar_dw &v0,
-                   const simd_dw<vectorNum> &v1)
+                   const simd_dw<vectorNum> &v0,
+                   const scalar_dw &v1)
  {
-  NE_S32(dst, simd_dw<vectorNum>(v0), v1);
+  NE_S32(dst, v0, simd_dw<vectorNum>(v1));
  }
  template <uint32_t vectorNum>
  INLINE void NE_S32(simd_m<vectorNum> &dst,
-                   const simd_dw<vectorNum> &v0,
+                   const scalar_dw &v0,
                     const simd_dw<vectorNum> &v1)
  {
-  for (uint32_t i = 0; i < vectorNum; ++i)
-    dst.m[i] = _mm_xor_ps(alltrue.v, SI2PS(_mm_cmpeq_epi32(PS2SI(v0.m[i]), PS2SI(v1.m[i]))));
+  NE_S32(dst, simd_dw<vectorNum>(v0), v1);
  }
  
  /* Load from contiguous double words */
@@ -298,25 +310,28 @@ INLINE void STORE(const simd_dw<vectorNum> &src, char *ptr) {
  
  /* Load immediates */
  template <uint32_t vectorNum>
-INLINE void LOADI(simd_dw<vectorNum> &dst, float f) {
+INLINE void LOADI(simd_dw<vectorNum> &dst, uint32_t u) {
+  union { uint32_t u; float f; } cast;
+  cast.u = u;
    for (uint32_t i = 0; i < vectorNum; ++i)
-    dst.m[i] = _mm_load1_ps(&f);
+    dst.m[i] = _mm_load1_ps(&cast.f);
  }
  
+#include <cstdio>
  /* Scatter */
  template <uint32_t vectorNum>
-INLINE void SCATTER(const simd_dw<vectorNum> &value,
-                    const simd_dw<vectorNum> &offset,
+INLINE void SCATTER(const simd_dw<vectorNum> &offset,
+                    const simd_dw<vectorNum> &value,
                      char *base_address) {
    for (uint32_t i = 0; i < vectorNum; ++i) {
      const int v0 = _mm_extract_epi32(PS2SI(value.m[i]), 0);
      const int v1 = _mm_extract_epi32(PS2SI(value.m[i]), 1);
      const int v2 = _mm_extract_epi32(PS2SI(value.m[i]), 2);
      const int v3 = _mm_extract_epi32(PS2SI(value.m[i]), 3);
-    const int o0 = _mm_extract_epi32(offset.m[i], 0);
-    const int o1 = _mm_extract_epi32(offset.m[i], 1);
-    const int o2 = _mm_extract_epi32(offset.m[i], 2);
-    const int o3 = _mm_extract_epi32(offset.m[i], 3);
+    const int o0 = _mm_extract_epi32(PS2SI(offset.m[i]), 0);
+    const int o1 = _mm_extract_epi32(PS2SI(offset.m[i]), 1);
+    const int o2 = _mm_extract_epi32(PS2SI(offset.m[i]), 2);
+    const int o3 = _mm_extract_epi32(PS2SI(offset.m[i]), 3);
      *(int*)(base_address + o0) = v0;
      *(int*)(base_address + o1) = v1;
      *(int*)(base_address + o2) = v2;
@@ -324,14 +339,14 @@ INLINE void SCATTER(const simd_dw<vectorNum> &value,
    }
  }
  template <uint32_t vectorNum>
-INLINE void SCATTER(const scalar_dw &value,
-                    const simd_dw<vectorNum> &offset,
+INLINE void SCATTER(const simd_dw<vectorNum> &offset,
+                    const scalar_dw &value,
                      char *base_address) {
    SCATTER(simd_dw<vectorNum>(value), offset, base_address);
  }
  template <uint32_t vectorNum>
-INLINE void SCATTER(const simd_dw<vectorNum> &value,
-                    const scalar_dw &offset,
+INLINE void SCATTER(const scalar_dw &offset,
+                    const simd_dw<vectorNum> &value,
                      char *base_address) {
    SCATTER(value, simd_dw<vectorNum>(offset), base_address);
  }
diff --git a/backend/src/backend/sim/sim_vector_str.cpp b/backend/src/backend/sim/sim_vector_str.cpp

index 2fbb7ed..ebe3607 100644 (file)
--- a/backend/src/backend/sim/sim_vector_str.cpp
+++ b/backend/src/backend/sim/sim_vector_str.cpp
@@ -183,6 +183,10 @@ std::string sim_vector_str =
  "template <uint32_t vectorNum>\\\n"
  "INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const SRC_TYPE &v1) {\\\n"
  "  NAME(dst, simd_dw<vectorNum>(v0), v1);\\\n"
+"}\\\n"
+"template <uint32_t vectorNum>\\\n"
+"INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const scalar_dw &v1) {\\\n"
+"  NAME(dst, simd_dw<vectorNum>(v0), simd_dw<vectorNum>(v1));\\\n"
  "}\n"
  "VEC_OP(simd_dw<vectorNum>, simd_dw<vectorNum>, ADD_F, _mm_add_ps, ID, ID, ID);\n"
  "VEC_OP(simd_dw<vectorNum>, simd_dw<vectorNum>, SUB_F, _mm_sub_ps, ID, ID, ID);\n"
@@ -218,6 +222,10 @@ std::string sim_vector_str =
  "template <uint32_t vectorNum>\\\n"
  "INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const SRC_TYPE &v1) {\\\n"
  "  NAME(dst, simd_dw<vectorNum>(v0), v1);\\\n"
+"}\\\n"
+"template <uint32_t vectorNum>\\\n"
+"INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const scalar_dw &v1) {\\\n"
+"  NAME(dst, simd_dw<vectorNum>(v0), simd_dw<vectorNum>(v1));\\\n"
  "}\n"
  "VEC_OP(simd_m<vectorNum>, simd_dw<vectorNum>, GE_S32, _mm_cmplt_epi32, SI2PS, PS2SI, PS2SI);\n"
  "VEC_OP(simd_m<vectorNum>, simd_dw<vectorNum>, LE_S32, _mm_cmpgt_epi32, SI2PS, PS2SI, PS2SI);\n"
@@ -241,6 +249,10 @@ std::string sim_vector_str =
  "template <uint32_t vectorNum>\\\n"
  "INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const SRC_TYPE &v1) {\\\n"
  "  NAME(dst, simd_dw<vectorNum>(v0), v1);\\\n"
+"}\\\n"
+"template <uint32_t vectorNum>\\\n"
+"INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const scalar_dw &v1) {\\\n"
+"  NAME(dst, simd_dw<vectorNum>(v0), simd_dw<vectorNum>(v1));\\\n"
  "}\n"
  "VEC_OP(simd_dw<vectorNum>, simd_dw<vectorNum>, MUL_S32, *, s);\n"
  "VEC_OP(simd_dw<vectorNum>, simd_dw<vectorNum>, DIV_S32, /, s);\n"
@@ -288,24 +300,24 @@ std::string sim_vector_str =
  "template <uint32_t vectorNum>\n"
  "INLINE void NE_S32(simd_m<vectorNum> &dst,\n"
  "                   const simd_dw<vectorNum> &v0,\n"
-"                   const scalar_dw &v1)\n"
+"                   const simd_dw<vectorNum> &v1)\n"
  "{\n"
-"  NE_S32(dst, v0, simd_dw<vectorNum>(v1));\n"
+"  for (uint32_t i = 0; i < vectorNum; ++i)\n"
+"    dst.m[i] = _mm_xor_ps(alltrue.v, SI2PS(_mm_cmpeq_epi32(PS2SI(v0.m[i]), PS2SI(v1.m[i]))));\n"
  "}\n"
  "template <uint32_t vectorNum>\n"
  "INLINE void NE_S32(simd_m<vectorNum> &dst,\n"
-"                   const scalar_dw &v0,\n"
-"                   const simd_dw<vectorNum> &v1)\n"
+"                   const simd_dw<vectorNum> &v0,\n"
+"                   const scalar_dw &v1)\n"
  "{\n"
-"  NE_S32(dst, simd_dw<vectorNum>(v0), v1);\n"
+"  NE_S32(dst, v0, simd_dw<vectorNum>(v1));\n"
  "}\n"
  "template <uint32_t vectorNum>\n"
  "INLINE void NE_S32(simd_m<vectorNum> &dst,\n"
-"                   const simd_dw<vectorNum> &v0,\n"
+"                   const scalar_dw &v0,\n"
  "                   const simd_dw<vectorNum> &v1)\n"
  "{\n"
-"  for (uint32_t i = 0; i < vectorNum; ++i)\n"
-"    dst.m[i] = _mm_xor_ps(alltrue.v, SI2PS(_mm_cmpeq_epi32(PS2SI(v0.m[i]), PS2SI(v1.m[i]))));\n"
+"  NE_S32(dst, simd_dw<vectorNum>(v0), v1);\n"
  "}\n"
  "\n"
  "/* Load from contiguous double words */\n"
@@ -324,25 +336,28 @@ std::string sim_vector_str =
  "\n"
  "/* Load immediates */\n"
  "template <uint32_t vectorNum>\n"
-"INLINE void LOADI(simd_dw<vectorNum> &dst, float f) {\n"
+"INLINE void LOADI(simd_dw<vectorNum> &dst, uint32_t u) {\n"
+"  union { uint32_t u; float f; } cast;\n"
+"  cast.u = u;\n"
  "  for (uint32_t i = 0; i < vectorNum; ++i)\n"
-"    dst.m[i] = _mm_load1_ps(&f);\n"
+"    dst.m[i] = _mm_load1_ps(&cast.f);\n"
  "}\n"
  "\n"
+"#include <cstdio>\n"
  "/* Scatter */\n"
  "template <uint32_t vectorNum>\n"
-"INLINE void SCATTER(const simd_dw<vectorNum> &value,\n"
-"                    const simd_dw<vectorNum> &offset,\n"
+"INLINE void SCATTER(const simd_dw<vectorNum> &offset,\n"
+"                    const simd_dw<vectorNum> &value,\n"
  "                    char *base_address) {\n"
  "  for (uint32_t i = 0; i < vectorNum; ++i) {\n"
  "    const int v0 = _mm_extract_epi32(PS2SI(value.m[i]), 0);\n"
  "    const int v1 = _mm_extract_epi32(PS2SI(value.m[i]), 1);\n"
  "    const int v2 = _mm_extract_epi32(PS2SI(value.m[i]), 2);\n"
  "    const int v3 = _mm_extract_epi32(PS2SI(value.m[i]), 3);\n"
-"    const int o0 = _mm_extract_epi32(offset.m[i], 0);\n"
-"    const int o1 = _mm_extract_epi32(offset.m[i], 1);\n"
-"    const int o2 = _mm_extract_epi32(offset.m[i], 2);\n"
-"    const int o3 = _mm_extract_epi32(offset.m[i], 3);\n"
+"    const int o0 = _mm_extract_epi32(PS2SI(offset.m[i]), 0);\n"
+"    const int o1 = _mm_extract_epi32(PS2SI(offset.m[i]), 1);\n"
+"    const int o2 = _mm_extract_epi32(PS2SI(offset.m[i]), 2);\n"
+"    const int o3 = _mm_extract_epi32(PS2SI(offset.m[i]), 3);\n"
  "    *(int*)(base_address + o0) = v0;\n"
  "    *(int*)(base_address + o1) = v1;\n"
  "    *(int*)(base_address + o2) = v2;\n"
@@ -350,14 +365,14 @@ std::string sim_vector_str =
  "  }\n"
  "}\n"
  "template <uint32_t vectorNum>\n"
-"INLINE void SCATTER(const scalar_dw &value,\n"
-"                    const simd_dw<vectorNum> &offset,\n"
+"INLINE void SCATTER(const simd_dw<vectorNum> &offset,\n"
+"                    const scalar_dw &value,\n"
  "                    char *base_address) {\n"
  "  SCATTER(simd_dw<vectorNum>(value), offset, base_address);\n"
  "}\n"
  "template <uint32_t vectorNum>\n"
-"INLINE void SCATTER(const simd_dw<vectorNum> &value,\n"
-"                    const scalar_dw &offset,\n"
+"INLINE void SCATTER(const scalar_dw &offset,\n"
+"                    const simd_dw<vectorNum> &value,\n"
  "                    char *base_address) {\n"
  "  SCATTER(value, simd_dw<vectorNum>(offset), base_address);\n"
  "}\n"
diff --git a/backend/src/backend/sim_context.cpp b/backend/src/backend/sim_context.cpp

index 49b5a85..fa2ea1f 100644 (file)
--- a/backend/src/backend/sim_context.cpp
+++ b/backend/src/backend/sim_context.cpp
@@ -44,12 +44,16 @@ namespace gbe
    void SimContext::emitRegisters(void) {
      GBE_ASSERT(fn.getProfile() == ir::PROFILE_OCL);
      const uint32_t regNum = fn.regNum();
+    bool lid0 = false, lid1 = false, lid2 = false; // for local id registers
      for (uint32_t regID = 0; regID < regNum; ++regID) {
        const ir::Register reg(regID);
        if (reg == ir::ocl::groupid0 ||
            reg == ir::ocl::groupid1 ||
            reg == ir::ocl::groupid2)
          continue;
+      if (reg == ir::ocl::lid0) lid0 = true;
+      if (reg == ir::ocl::lid1) lid1 = true;
+      if (reg == ir::ocl::lid2) lid2 = true;
        const ir::RegisterData regData = fn.getRegisterData(reg);
        switch (regData.family) {
          case ir::FAMILY_BOOL:
@@ -66,16 +70,148 @@ namespace gbe
          break;
        }
      }
+
+    // Always declare local IDs
+    if (lid0 == false) o << "scalar_dw _" << uint32_t(ir::ocl::lid0) << ";\n";
+    if (lid1 == false) o << "scalar_dw _" << uint32_t(ir::ocl::lid1) << ";\n";
+    if (lid2 == false) o << "scalar_dw _" << uint32_t(ir::ocl::lid2) << ";\n";
    }
  
-  void SimContext::loadCurbe(void) {
+#define LOAD_SPECIAL_REG(CURBE, REG) do {                                 \
+    const int32_t offset = kernel->getCurbeOffset(CURBE, 0);              \
+    if (offset >= 0)                                                      \
+      o << "LOAD(_" << uint32_t(REG) << ", curbe + " << offset << ");\n"; \
+  } while (0)
+
+  void SimContext::emitCurbeLoad(void) {
      // Right now curbe is only made of input argument stuff
      const uint32_t inputNum = fn.inputNum();
      for (uint32_t inputID = 0; inputID < inputNum; ++inputID) {
-
+      const ir::FunctionInput &input = fn.getInput(inputID);
+      const ir::Register reg = input.reg;
+      const int32_t offset = kernel->getCurbeOffset(GBE_CURBE_KERNEL_ARGUMENT, inputID);
+      // XXX add support for these items
+      GBE_ASSERT (input.type != ir::FunctionInput::VALUE &&
+                  input.type != ir::FunctionInput::STRUCTURE &&
+                  input.type != ir::FunctionInput::IMAGE &&
+                  input.type != ir::FunctionInput::LOCAL_POINTER);
+      GBE_ASSERT(offset >= 0);
+      o << "LOAD(_" << uint32_t(reg) << ", curbe + " << offset << ");\n";
      }
+
+    // We must now load the special registers needed by the kernel
+    LOAD_SPECIAL_REG(GBE_CURBE_LOCAL_ID_X, ir::ocl::lid0);
+    LOAD_SPECIAL_REG(GBE_CURBE_LOCAL_ID_Y, ir::ocl::lid1);
+    LOAD_SPECIAL_REG(GBE_CURBE_LOCAL_ID_Z, ir::ocl::lid2);
+    LOAD_SPECIAL_REG(GBE_CURBE_LOCAL_SIZE_X, ir::ocl::lsize0);
+    LOAD_SPECIAL_REG(GBE_CURBE_LOCAL_SIZE_Y, ir::ocl::lsize1);
+    LOAD_SPECIAL_REG(GBE_CURBE_LOCAL_SIZE_Z, ir::ocl::lsize2);
+    LOAD_SPECIAL_REG(GBE_CURBE_GLOBAL_SIZE_X, ir::ocl::gsize0);
+    LOAD_SPECIAL_REG(GBE_CURBE_GLOBAL_SIZE_Y, ir::ocl::gsize1);
+    LOAD_SPECIAL_REG(GBE_CURBE_GLOBAL_SIZE_Z, ir::ocl::gsize2);
+    LOAD_SPECIAL_REG(GBE_CURBE_GLOBAL_OFFSET_X, ir::ocl::goffset0);
+    LOAD_SPECIAL_REG(GBE_CURBE_GLOBAL_OFFSET_Y, ir::ocl::goffset1);
+    LOAD_SPECIAL_REG(GBE_CURBE_GLOBAL_OFFSET_Z, ir::ocl::goffset2);
+    LOAD_SPECIAL_REG(GBE_CURBE_GROUP_NUM_X, ir::ocl::numgroup0);
+    LOAD_SPECIAL_REG(GBE_CURBE_GROUP_NUM_Y, ir::ocl::numgroup1);
+    LOAD_SPECIAL_REG(GBE_CURBE_GROUP_NUM_Z, ir::ocl::numgroup2);
+  }
+
+  static const char *typeStr(const ir::Type &type) {
+    switch (type) {
+      case ir::TYPE_BOOL: return "M";
+      case ir::TYPE_S8:   return "S8";
+      case ir::TYPE_S16:  return "S16";
+      case ir::TYPE_S32:  return "S32";
+      case ir::TYPE_S64:  return "S64";
+      case ir::TYPE_U8:   return "U8";
+      case ir::TYPE_U16:  return "U16";
+      case ir::TYPE_U32:  return "U32";
+      case ir::TYPE_U64:  return "U64";
+      case ir::TYPE_HALF: return "HALF";
+      case ir::TYPE_FLOAT: return "F";
+      case ir::TYPE_DOUBLE: return "D";
+      default: NOT_IMPLEMENTED; return NULL;
+    };
+  }
+
+  void SimContext::emitInstructionStream(void) {
+    using namespace ir;
+    fn.foreachInstruction([&](const Instruction &insn) {
+      const char *opcodeStr = NULL;
+      const Opcode opcode = insn.getOpcode();
+#define DECL_INSN(OPCODE, FAMILY)                         \
+      case OP_##OPCODE:                                   \
+      if (opcode == OP_LOAD) opcodeStr = "GATHER";        \
+      else if (opcode == OP_STORE) opcodeStr = "SCATTER"; \
+      else opcodeStr = #OPCODE;                           \
+      break;
+      switch (opcode) {
+        #include "ir/instruction.hxx"
+      default: NOT_IMPLEMENTED;
+#undef DECL_INSN
+      }
+      if (opcode == OP_LABEL) {
+        o << "label" << cast<LabelInstruction>(insn).getLabelIndex() << ":\n";
+        return;
+      } else if (opcode == OP_BRA) {
+        NOT_IMPLEMENTED;
+        return;
+      } else if (opcode == OP_RET) {
+        std::cout << "BE AWARE OF RET: ONLY ONE RET AT THE END OF THE FUNCTION SHOULD BE OUTPUTTED!";
+        o << "return;\n";
+        return;
+      }
+
+      // Extra checks
+#if GBE_DEBUG
+      if (opcode == OP_LOAD)
+        GBE_ASSERT(cast<LoadInstruction>(insn).getValueNum() == 1);
+      if (opcode == OP_STORE)
+        GBE_ASSERT(cast<StoreInstruction>(insn).getValueNum() == 1);
+#endif /* GBE_DEBUG */
+
+      // Regular compute instruction
+      const uint32_t dstNum = insn.getDstNum();
+      const uint32_t srcNum = insn.getSrcNum();
+      o << opcodeStr;
+
+      // Append type when needed
+      if (insn.isMemberOf<UnaryInstruction>() == true)
+       o << "_" << typeStr(cast<UnaryInstruction>(insn).getType());
+      else if (insn.isMemberOf<BinaryInstruction>() == true)
+       o << "_" << typeStr(cast<BinaryInstruction>(insn).getType());
+      else if (insn.isMemberOf<TernaryInstruction>() == true)
+       o << "_" << typeStr(cast<BinaryInstruction>(insn).getType());
+      else if (insn.isMemberOf<CompareInstruction>() == true)
+       o << "_" << typeStr(cast<CompareInstruction>(insn).getType());
+      o << "(";
+
+      // Output both destinations and sources in that order
+      for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
+        o << "_" << uint32_t(insn.getDst(dstID));
+        if (dstID < dstNum-1 || srcNum > 0) o << ", ";
+      }
+      for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+        o << "_" << uint32_t(insn.getSrc(srcID));
+        if (srcID < srcNum-1) o << ", ";
+      }
+
+      // Append extra stuff for instructions that need it
+      if (opcode == OP_LOADI) {
+        Immediate imm = cast<LoadImmInstruction>(insn).getImmediate();
+        GBE_ASSERT(imm.type == TYPE_S32 ||
+                   imm.type == TYPE_U32 ||
+                   imm.type == TYPE_FLOAT);
+        o << ", " << imm.data.u32;
+      } else if (opcode == OP_LOAD || opcode == OP_STORE)
+        o << ", base";
+      o << ");\n";
+    });
    }
  
+#undef LOAD_SPECIAL_REG
+
    void SimContext::emitCode(void) {
      SimKernel *simKernel = static_cast<SimKernel*>(this->kernel);
      char srcStr[L_tmpnam+1], libStr[L_tmpnam+1];
@@ -91,15 +227,19 @@ namespace gbe
        << "(gbe_simulator sim, uint32_t tid, scalar_dw _3, scalar_dw _4, scalar_dw _5)\n"
        << "{\n"
        << "const size_t curbe_sz = sim->get_curbe_size(sim);\n"
-      << "const char *curbe = (const char*) sim->get_curbe_address(sim) + curbe_sz * tid;\n";
+      << "const char *curbe = (const char*) sim->get_curbe_address(sim) + curbe_sz * tid;\n"
+      << "char *base = (char*) sim->get_base_address(sim);\n";
      this->emitRegisters();
+    this->emitCurbeLoad();
+    this->emitInstructionStream();
      o << "}\n";
      o << std::endl;
      o.close();
  
      /* Compile the function */
      std::cout << "# source: " << srcName << " library: " << libName << std::endl;
-    std::string compileCmd = "g++ -funroll-loops -shared -msse -msse2 -msse3 -mssse3 -msse4.1 -g -O3 -o ";
+    //std::string compileCmd = "g++ -fPIC -funroll-loops -shared -msse -msse2 -msse3 -mssse3 -msse4.1 -g -O3 -o ";
+    std::string compileCmd = "g++ -fPIC -funroll-loops -shared -msse -msse2 -msse3 -mssse3 -msse4.1 -g -o ";
      compileCmd += libName;
      compileCmd += " ";
      compileCmd += srcName;
diff --git a/backend/src/backend/sim_context.hpp b/backend/src/backend/sim_context.hpp

index 35135c1..9021adb 100644 (file)
--- a/backend/src/backend/sim_context.hpp
+++ b/backend/src/backend/sim_context.hpp
@@ -50,7 +50,9 @@ namespace gbe
      /*! Emit all the register declarations */
      void emitRegisters(void);
      /*! Load the curbe data into the registers */
-    void loadCurbe(void);
+    void emitCurbeLoad(void);
+    /*! Emit the instructions */
+    void emitInstructionStream(void);
      /*! Implements base class */
      virtual Kernel *allocateKernel(void);
      std::ofstream o; //!< Where to output the c++ string
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp

index ee80ccc..6941ac5 100644 (file)
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -739,13 +739,11 @@ namespace ir {
    };
  
    RegisterData Instruction::getDstData(uint32_t ID) const {
-    GBE_ASSERT(this->getParent() != NULL);
-    const Function &fn = this->getParent()->getParent();
+    const Function &fn = this->getFunction();
      return fn.getRegisterData(this->getDst(ID));
    }
    RegisterData Instruction::getSrcData(uint32_t ID) const {
-    GBE_ASSERT(this->getParent() != NULL);
-    const Function &fn = this->getParent()->getParent();
+    const Function &fn = this->getFunction();
      return fn.getRegisterData(this->getSrc(ID));
    }
  
@@ -861,8 +859,7 @@ END_FUNCTION(Instruction, bool)
  #define DECL_INSN(OPCODE, CLASS)                                  \
    case OP_##OPCODE:                                               \
    {                                                               \
-    GBE_ASSERT(this->getParent() != NULL);                        \
-    const Function &fn = this->getParent()->getParent();          \
+    const Function &fn = this->getFunction();                     \
      return reinterpret_cast<const internal::CLASS*>(this)->CALL;  \
    }
  
@@ -882,6 +879,12 @@ END_FUNCTION(Instruction, Register)
  #undef END_FUNCTION
  #undef START_FUNCTION
  
+  const Function &Instruction::getFunction(void) const {
+    const BasicBlock *bb = this->getParent();
+    GBE_ASSERT(bb != NULL);
+    return bb->getParent();
+  }
+
  #define DECL_MEM_FN(CLASS, RET, PROTOTYPE, CALL)                  \
    RET CLASS::PROTOTYPE const {                                    \
      return reinterpret_cast<const internal::CLASS*>(this)->CALL;  \
@@ -900,7 +903,6 @@ DECL_MEM_FN(StoreInstruction, AddressSpace, getAddressSpace(void), getAddressSpa
  DECL_MEM_FN(LoadInstruction, Type, getValueType(void), getValueType())
  DECL_MEM_FN(LoadInstruction, uint32_t, getValueNum(void), getValueNum())
  DECL_MEM_FN(LoadInstruction, AddressSpace, getAddressSpace(void), getAddressSpace())
-DECL_MEM_FN(LoadImmInstruction, Immediate, getImmediate(const Function &fn), getImmediate(fn))
  DECL_MEM_FN(LoadImmInstruction, Type, getType(void), getType())
  DECL_MEM_FN(LabelInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
  DECL_MEM_FN(BranchInstruction, bool, isPredicated(void), isPredicated())
@@ -908,6 +910,11 @@ DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
  
  #undef DECL_MEM_FN
  
+  Immediate LoadImmInstruction::getImmediate(void) const {
+    const Function &fn = this->getFunction();
+    return reinterpret_cast<const internal::LoadImmInstruction*>(this)->getImmediate(fn);
+  }
+
    ///////////////////////////////////////////////////////////////////////////
    // Implements the emission functions
    ///////////////////////////////////////////////////////////////////////////
@@ -1038,11 +1045,8 @@ DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
      return insn.convert();
    }
  
-  std::ostream &operator<< (std::ostream &out, const Instruction &insn)
-  {
-    GBE_ASSERT(insn.getParent() != NULL);
-    const BasicBlock *bb = insn.getParent();
-    const Function &fn = bb->getParent();
+  std::ostream &operator<< (std::ostream &out, const Instruction &insn) {
+    const Function &fn = insn.getFunction();
      switch (insn.getOpcode()) {
  #define DECL_INSN(OPCODE, CLASS)                                     \
        case OP_##OPCODE:                                              \
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp

index cbe89b0..3befd02 100644 (file)
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -107,6 +107,8 @@ namespace ir {
      BasicBlock *getParent(void) { return parent; }
      const BasicBlock *getParent(void) const { return parent; }
      void setParent(BasicBlock *block) { this->parent = block; }
+    /*! Get the function from the parent basic block */
+    const Function &getFunction(void) const;
      /*! Check that the instruction is well formed (type properly match,
       *  registers not of bound and so on). If not well formed, provide a reason
       *  in string why
@@ -236,7 +238,7 @@ namespace ir {
    class LoadImmInstruction : public Instruction {
    public:
      /*! Return the value stored in the instruction */
-    Immediate getImmediate(const Function &fn) const;
+    Immediate getImmediate(void) const;
      /*! Return the type of the stored value */
      Type getType(void) const;
      /*! Return true if the given instruction is an instance of this class */
diff --git a/backend/src/sys/debug.cpp b/backend/src/sys/debug.cpp

new file mode 100644 (file)

index 0000000..b892737
--- /dev/null
+++ b/backend/src/sys/debug.cpp
@@ -0,0 +1,66 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+/**
+ * \file debug.cpp
+ * \author Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+#include "debug.hpp"
+#include <cstdio>
+#include <sstream>
+#include <string>
+
+namespace gbe
+{
+#define DECL_DEBUG_VAR(TYPE, NAME) TYPE NAME;
+  #include "debug.hxx"
+#undef DECL_DEBUG_VAR
+} /* namespace gbe */
+
+namespace
+{
+  template <typename VarType>
+  static VarType getValue(const char *str) {
+    VarType value;
+    std::stringstream ss;
+    ss << std::string(str);
+    ss >> value;
+    return value;
+  }
+
+  struct DebugVarInitializer
+  {
+    DebugVarInitializer(void) {
+#define DECL_DEBUG_VAR(TYPE, NAME) gbe::NAME = TYPE(0);
+#include "debug.hxx"
+#undef DECL_DEBUG_VAR
+
+#define DECL_DEBUG_VAR(TYPE, NAME) do { \
+  const char *str = getenv(#NAME); \
+  if (str != NULL) gbe::NAME = getValue<TYPE>(str); \
+} while (0);
+#include "debug.hxx"
+#undef DECL_DEBUG_VAR
+    }
+  };
+
+  static DebugVarInitializer debugVarInitializer;
+} /* namespace */
+
diff --git a/backend/src/sys/debug.hpp b/backend/src/sys/debug.hpp

new file mode 100644 (file)

index 0000000..1abbfca
--- /dev/null
+++ b/backend/src/sys/debug.hpp
@@ -0,0 +1,33 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+/**
+ * \file debug.hpp
+ * \author Benjamin Segovia <benjamin.segovia@intel.com>
+ *
+ * shitloads of debug variables (set on pre-main) that the user can set
+ */
+
+namespace gbe
+{
+#define DECL_DEBUG_VAR(TYPE, NAME) extern TYPE NAME;
+  #include "debug.hxx"
+#undef DECL_DEBUG_VAR
+} /* namespace gbe */
+
diff --git a/backend/src/sys/debug.hxx b/backend/src/sys/debug.hxx

new file mode 100644 (file)

index 0000000..1f9a66a
--- /dev/null
+++ b/backend/src/sys/debug.hxx
@@ -0,0 +1,3 @@
+DECL_DEBUG_VAR(bool, OCL_OUTPUT_GEN_IR)
+DECL_DEBUG_VAR(bool, OCL_OUTPUT_LLVM)
+
author	Benjamin Segovia <segovia.benjamin@gmail.com>
	Fri, 13 Apr 2012 18:41:56 +0000 (18:41 +0000)
committer	Keith Packard <keithp@keithp.com>
	Fri, 10 Aug 2012 23:16:23 +0000 (16:16 -0700)
backend/kernels/stdlib.h		patch \| blob \| history
backend/src/CMakeLists.txt		patch \| blob \| history
backend/src/backend/context.cpp		patch \| blob \| history
backend/src/backend/sim/sim_vector.h		patch \| blob \| history
backend/src/backend/sim/sim_vector_str.cpp		patch \| blob \| history
backend/src/backend/sim_context.cpp		patch \| blob \| history
backend/src/backend/sim_context.hpp		patch \| blob \| history
backend/src/ir/instruction.cpp		patch \| blob \| history
backend/src/ir/instruction.hpp		patch \| blob \| history
backend/src/sys/debug.cpp	[new file with mode: 0644]	patch \| blob
backend/src/sys/debug.hpp	[new file with mode: 0644]	patch \| blob
backend/src/sys/debug.hxx	[new file with mode: 0644]	patch \| blob