From a217fd38d20d8ce8bdb4ac653238c7b885556233 Mon Sep 17 00:00:00 2001 From: Benjamin Segovia Date: Fri, 13 Apr 2012 18:41:56 +0000 Subject: [PATCH] Made the first kernels work with the simulators Added some debug variables --- backend/kernels/stdlib.h | 2 +- backend/src/CMakeLists.txt | 3 + backend/src/backend/context.cpp | 27 +++++- backend/src/backend/sim/sim_vector.h | 55 +++++++---- backend/src/backend/sim/sim_vector_str.cpp | 55 +++++++---- backend/src/backend/sim_context.cpp | 148 ++++++++++++++++++++++++++++- backend/src/backend/sim_context.hpp | 4 +- backend/src/ir/instruction.cpp | 28 +++--- backend/src/ir/instruction.hpp | 4 +- backend/src/sys/debug.cpp | 66 +++++++++++++ backend/src/sys/debug.hpp | 33 +++++++ backend/src/sys/debug.hxx | 3 + 12 files changed, 365 insertions(+), 63 deletions(-) create mode 100644 backend/src/sys/debug.cpp create mode 100644 backend/src/sys/debug.hpp create mode 100644 backend/src/sys/debug.hxx diff --git a/backend/kernels/stdlib.h b/backend/kernels/stdlib.h index 472655a..eaf4b17 100644 --- a/backend/kernels/stdlib.h +++ b/backend/kernels/stdlib.h @@ -43,7 +43,7 @@ DECL_PUBLIC_WORK_ITEM_FN(get_num_groups) #undef DECL_PUBLIC_WORK_ITEM_FN inline unsigned int get_global_id(unsigned int dim) { - return get_local_id(dim) + get_local_size(dim) * get_num_groups(dim); + return get_local_id(dim) + get_local_size(dim) * get_group_id(dim); } __attribute__ ((pure,const,overloadable)) float mad(float a, float b, float c); diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt index 7ee1714..db8beb7 100644 --- a/backend/src/CMakeLists.txt +++ b/backend/src/CMakeLists.txt @@ -42,6 +42,9 @@ else (GBE_USE_BLOB) sys/condition.hpp sys/platform.cpp sys/platform.hpp + sys/debug.cpp + sys/debug.hpp + sys/debug.hxx ir/context.cpp ir/context.hpp ir/profile.cpp diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp index 16b8fad..7f6ac89 100644 --- a/backend/src/backend/context.cpp +++ b/backend/src/backend/context.cpp @@ -26,16 +26,24 @@ #include "ir/unit.hpp" #include "ir/function.hpp" #include "ir/profile.hpp" +#include "ir/liveness.hpp" +#include "ir/value.hpp" #include namespace gbe { Context::Context(const ir::Unit &unit, const std::string &name) : unit(unit), fn(*unit.getFunction(name)), name(name), liveness(NULL), dag(NULL) - { GBE_ASSERT(unit.getPointerSize() == ir::POINTER_32_BITS); + { + GBE_ASSERT(unit.getPointerSize() == ir::POINTER_32_BITS); + this->liveness = GBE_NEW(ir::Liveness, (ir::Function&) fn); + this->dag = GBE_NEW(ir::FunctionDAG, *this->liveness); this->simdWidth = 16; /* XXX environment variable for that to start with */ } - Context::~Context(void) {} + Context::~Context(void) { + GBE_SAFE_DELETE(this->dag); + GBE_SAFE_DELETE(this->liveness); + } Kernel *Context::compileKernel(void) { this->kernel = this->allocateKernel(); @@ -77,7 +85,7 @@ namespace gbe for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { const ir::Register reg = insn.getSrc(srcID); if (fn.isSpecialReg(reg) == false) continue; - + if (specialRegs.contains(reg) == true) continue; INSERT_REG(lsize0, LOCAL_SIZE_X) INSERT_REG(lsize1, LOCAL_SIZE_Y) INSERT_REG(lsize2, LOCAL_SIZE_Z) @@ -90,10 +98,21 @@ namespace gbe INSERT_REG(numgroup0, GROUP_NUM_X) INSERT_REG(numgroup1, GROUP_NUM_Y) INSERT_REG(numgroup2, GROUP_NUM_Z); + specialRegs.insert(reg); } }); + kernel->curbeSize = ALIGN(kernel->curbeSize, 32); + + // Local IDs always go at the end of the curbe + const size_t localIDSize = sizeof(uint32_t) * this->simdWidth; + const PatchInfo lid0(GBE_CURBE_LOCAL_ID_X, 0, kernel->curbeSize+0*localIDSize); + const PatchInfo lid1(GBE_CURBE_LOCAL_ID_Y, 0, kernel->curbeSize+1*localIDSize); + const PatchInfo lid2(GBE_CURBE_LOCAL_ID_Z, 0, kernel->curbeSize+2*localIDSize); + kernel->patches.push_back(lid0); + kernel->patches.push_back(lid1); + kernel->patches.push_back(lid2); - // After this point the vector is immutable. so, Sorting it will make + // After this point the vector is immutable. Sorting it will make // research faster std::sort(kernel->patches.begin(), kernel->patches.end()); } diff --git a/backend/src/backend/sim/sim_vector.h b/backend/src/backend/sim/sim_vector.h index cd9b5e6..3fb8597 100644 --- a/backend/src/backend/sim/sim_vector.h +++ b/backend/src/backend/sim/sim_vector.h @@ -157,6 +157,10 @@ INLINE void NAME(DST_TYPE &dst, const SRC_TYPE &v0, const scalar_dw &v1) {\ template \ INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const SRC_TYPE &v1) {\ NAME(dst, simd_dw(v0), v1);\ +}\ +template \ +INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const scalar_dw &v1) {\ + NAME(dst, simd_dw(v0), simd_dw(v1));\ } VEC_OP(simd_dw, simd_dw, ADD_F, _mm_add_ps, ID, ID, ID); VEC_OP(simd_dw, simd_dw, SUB_F, _mm_sub_ps, ID, ID, ID); @@ -192,6 +196,10 @@ INLINE void NAME(DST_TYPE &dst, const SRC_TYPE &v0, const scalar_dw &v1) {\ template \ INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const SRC_TYPE &v1) {\ NAME(dst, simd_dw(v0), v1);\ +}\ +template \ +INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const scalar_dw &v1) {\ + NAME(dst, simd_dw(v0), simd_dw(v1));\ } VEC_OP(simd_m, simd_dw, GE_S32, _mm_cmplt_epi32, SI2PS, PS2SI, PS2SI); VEC_OP(simd_m, simd_dw, LE_S32, _mm_cmpgt_epi32, SI2PS, PS2SI, PS2SI); @@ -215,6 +223,10 @@ INLINE void NAME(DST_TYPE &dst, const SRC_TYPE &v0, const scalar_dw &v1) {\ template \ INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const SRC_TYPE &v1) {\ NAME(dst, simd_dw(v0), v1);\ +}\ +template \ +INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const scalar_dw &v1) {\ + NAME(dst, simd_dw(v0), simd_dw(v1));\ } VEC_OP(simd_dw, simd_dw, MUL_S32, *, s); VEC_OP(simd_dw, simd_dw, DIV_S32, /, s); @@ -262,24 +274,24 @@ VEC_OP(simd_m, simd_dw, GT_U32, >, u); template INLINE void NE_S32(simd_m &dst, const simd_dw &v0, - const scalar_dw &v1) + const simd_dw &v1) { - NE_S32(dst, v0, simd_dw(v1)); + for (uint32_t i = 0; i < vectorNum; ++i) + dst.m[i] = _mm_xor_ps(alltrue.v, SI2PS(_mm_cmpeq_epi32(PS2SI(v0.m[i]), PS2SI(v1.m[i])))); } template INLINE void NE_S32(simd_m &dst, - const scalar_dw &v0, - const simd_dw &v1) + const simd_dw &v0, + const scalar_dw &v1) { - NE_S32(dst, simd_dw(v0), v1); + NE_S32(dst, v0, simd_dw(v1)); } template INLINE void NE_S32(simd_m &dst, - const simd_dw &v0, + const scalar_dw &v0, const simd_dw &v1) { - for (uint32_t i = 0; i < vectorNum; ++i) - dst.m[i] = _mm_xor_ps(alltrue.v, SI2PS(_mm_cmpeq_epi32(PS2SI(v0.m[i]), PS2SI(v1.m[i])))); + NE_S32(dst, simd_dw(v0), v1); } /* Load from contiguous double words */ @@ -298,25 +310,28 @@ INLINE void STORE(const simd_dw &src, char *ptr) { /* Load immediates */ template -INLINE void LOADI(simd_dw &dst, float f) { +INLINE void LOADI(simd_dw &dst, uint32_t u) { + union { uint32_t u; float f; } cast; + cast.u = u; for (uint32_t i = 0; i < vectorNum; ++i) - dst.m[i] = _mm_load1_ps(&f); + dst.m[i] = _mm_load1_ps(&cast.f); } +#include /* Scatter */ template -INLINE void SCATTER(const simd_dw &value, - const simd_dw &offset, +INLINE void SCATTER(const simd_dw &offset, + const simd_dw &value, char *base_address) { for (uint32_t i = 0; i < vectorNum; ++i) { const int v0 = _mm_extract_epi32(PS2SI(value.m[i]), 0); const int v1 = _mm_extract_epi32(PS2SI(value.m[i]), 1); const int v2 = _mm_extract_epi32(PS2SI(value.m[i]), 2); const int v3 = _mm_extract_epi32(PS2SI(value.m[i]), 3); - const int o0 = _mm_extract_epi32(offset.m[i], 0); - const int o1 = _mm_extract_epi32(offset.m[i], 1); - const int o2 = _mm_extract_epi32(offset.m[i], 2); - const int o3 = _mm_extract_epi32(offset.m[i], 3); + const int o0 = _mm_extract_epi32(PS2SI(offset.m[i]), 0); + const int o1 = _mm_extract_epi32(PS2SI(offset.m[i]), 1); + const int o2 = _mm_extract_epi32(PS2SI(offset.m[i]), 2); + const int o3 = _mm_extract_epi32(PS2SI(offset.m[i]), 3); *(int*)(base_address + o0) = v0; *(int*)(base_address + o1) = v1; *(int*)(base_address + o2) = v2; @@ -324,14 +339,14 @@ INLINE void SCATTER(const simd_dw &value, } } template -INLINE void SCATTER(const scalar_dw &value, - const simd_dw &offset, +INLINE void SCATTER(const simd_dw &offset, + const scalar_dw &value, char *base_address) { SCATTER(simd_dw(value), offset, base_address); } template -INLINE void SCATTER(const simd_dw &value, - const scalar_dw &offset, +INLINE void SCATTER(const scalar_dw &offset, + const simd_dw &value, char *base_address) { SCATTER(value, simd_dw(offset), base_address); } diff --git a/backend/src/backend/sim/sim_vector_str.cpp b/backend/src/backend/sim/sim_vector_str.cpp index 2fbb7ed..ebe3607 100644 --- a/backend/src/backend/sim/sim_vector_str.cpp +++ b/backend/src/backend/sim/sim_vector_str.cpp @@ -183,6 +183,10 @@ std::string sim_vector_str = "template \\\n" "INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const SRC_TYPE &v1) {\\\n" " NAME(dst, simd_dw(v0), v1);\\\n" +"}\\\n" +"template \\\n" +"INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const scalar_dw &v1) {\\\n" +" NAME(dst, simd_dw(v0), simd_dw(v1));\\\n" "}\n" "VEC_OP(simd_dw, simd_dw, ADD_F, _mm_add_ps, ID, ID, ID);\n" "VEC_OP(simd_dw, simd_dw, SUB_F, _mm_sub_ps, ID, ID, ID);\n" @@ -218,6 +222,10 @@ std::string sim_vector_str = "template \\\n" "INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const SRC_TYPE &v1) {\\\n" " NAME(dst, simd_dw(v0), v1);\\\n" +"}\\\n" +"template \\\n" +"INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const scalar_dw &v1) {\\\n" +" NAME(dst, simd_dw(v0), simd_dw(v1));\\\n" "}\n" "VEC_OP(simd_m, simd_dw, GE_S32, _mm_cmplt_epi32, SI2PS, PS2SI, PS2SI);\n" "VEC_OP(simd_m, simd_dw, LE_S32, _mm_cmpgt_epi32, SI2PS, PS2SI, PS2SI);\n" @@ -241,6 +249,10 @@ std::string sim_vector_str = "template \\\n" "INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const SRC_TYPE &v1) {\\\n" " NAME(dst, simd_dw(v0), v1);\\\n" +"}\\\n" +"template \\\n" +"INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const scalar_dw &v1) {\\\n" +" NAME(dst, simd_dw(v0), simd_dw(v1));\\\n" "}\n" "VEC_OP(simd_dw, simd_dw, MUL_S32, *, s);\n" "VEC_OP(simd_dw, simd_dw, DIV_S32, /, s);\n" @@ -288,24 +300,24 @@ std::string sim_vector_str = "template \n" "INLINE void NE_S32(simd_m &dst,\n" " const simd_dw &v0,\n" -" const scalar_dw &v1)\n" +" const simd_dw &v1)\n" "{\n" -" NE_S32(dst, v0, simd_dw(v1));\n" +" for (uint32_t i = 0; i < vectorNum; ++i)\n" +" dst.m[i] = _mm_xor_ps(alltrue.v, SI2PS(_mm_cmpeq_epi32(PS2SI(v0.m[i]), PS2SI(v1.m[i]))));\n" "}\n" "template \n" "INLINE void NE_S32(simd_m &dst,\n" -" const scalar_dw &v0,\n" -" const simd_dw &v1)\n" +" const simd_dw &v0,\n" +" const scalar_dw &v1)\n" "{\n" -" NE_S32(dst, simd_dw(v0), v1);\n" +" NE_S32(dst, v0, simd_dw(v1));\n" "}\n" "template \n" "INLINE void NE_S32(simd_m &dst,\n" -" const simd_dw &v0,\n" +" const scalar_dw &v0,\n" " const simd_dw &v1)\n" "{\n" -" for (uint32_t i = 0; i < vectorNum; ++i)\n" -" dst.m[i] = _mm_xor_ps(alltrue.v, SI2PS(_mm_cmpeq_epi32(PS2SI(v0.m[i]), PS2SI(v1.m[i]))));\n" +" NE_S32(dst, simd_dw(v0), v1);\n" "}\n" "\n" "/* Load from contiguous double words */\n" @@ -324,25 +336,28 @@ std::string sim_vector_str = "\n" "/* Load immediates */\n" "template \n" -"INLINE void LOADI(simd_dw &dst, float f) {\n" +"INLINE void LOADI(simd_dw &dst, uint32_t u) {\n" +" union { uint32_t u; float f; } cast;\n" +" cast.u = u;\n" " for (uint32_t i = 0; i < vectorNum; ++i)\n" -" dst.m[i] = _mm_load1_ps(&f);\n" +" dst.m[i] = _mm_load1_ps(&cast.f);\n" "}\n" "\n" +"#include \n" "/* Scatter */\n" "template \n" -"INLINE void SCATTER(const simd_dw &value,\n" -" const simd_dw &offset,\n" +"INLINE void SCATTER(const simd_dw &offset,\n" +" const simd_dw &value,\n" " char *base_address) {\n" " for (uint32_t i = 0; i < vectorNum; ++i) {\n" " const int v0 = _mm_extract_epi32(PS2SI(value.m[i]), 0);\n" " const int v1 = _mm_extract_epi32(PS2SI(value.m[i]), 1);\n" " const int v2 = _mm_extract_epi32(PS2SI(value.m[i]), 2);\n" " const int v3 = _mm_extract_epi32(PS2SI(value.m[i]), 3);\n" -" const int o0 = _mm_extract_epi32(offset.m[i], 0);\n" -" const int o1 = _mm_extract_epi32(offset.m[i], 1);\n" -" const int o2 = _mm_extract_epi32(offset.m[i], 2);\n" -" const int o3 = _mm_extract_epi32(offset.m[i], 3);\n" +" const int o0 = _mm_extract_epi32(PS2SI(offset.m[i]), 0);\n" +" const int o1 = _mm_extract_epi32(PS2SI(offset.m[i]), 1);\n" +" const int o2 = _mm_extract_epi32(PS2SI(offset.m[i]), 2);\n" +" const int o3 = _mm_extract_epi32(PS2SI(offset.m[i]), 3);\n" " *(int*)(base_address + o0) = v0;\n" " *(int*)(base_address + o1) = v1;\n" " *(int*)(base_address + o2) = v2;\n" @@ -350,14 +365,14 @@ std::string sim_vector_str = " }\n" "}\n" "template \n" -"INLINE void SCATTER(const scalar_dw &value,\n" -" const simd_dw &offset,\n" +"INLINE void SCATTER(const simd_dw &offset,\n" +" const scalar_dw &value,\n" " char *base_address) {\n" " SCATTER(simd_dw(value), offset, base_address);\n" "}\n" "template \n" -"INLINE void SCATTER(const simd_dw &value,\n" -" const scalar_dw &offset,\n" +"INLINE void SCATTER(const scalar_dw &offset,\n" +" const simd_dw &value,\n" " char *base_address) {\n" " SCATTER(value, simd_dw(offset), base_address);\n" "}\n" diff --git a/backend/src/backend/sim_context.cpp b/backend/src/backend/sim_context.cpp index 49b5a85..fa2ea1f 100644 --- a/backend/src/backend/sim_context.cpp +++ b/backend/src/backend/sim_context.cpp @@ -44,12 +44,16 @@ namespace gbe void SimContext::emitRegisters(void) { GBE_ASSERT(fn.getProfile() == ir::PROFILE_OCL); const uint32_t regNum = fn.regNum(); + bool lid0 = false, lid1 = false, lid2 = false; // for local id registers for (uint32_t regID = 0; regID < regNum; ++regID) { const ir::Register reg(regID); if (reg == ir::ocl::groupid0 || reg == ir::ocl::groupid1 || reg == ir::ocl::groupid2) continue; + if (reg == ir::ocl::lid0) lid0 = true; + if (reg == ir::ocl::lid1) lid1 = true; + if (reg == ir::ocl::lid2) lid2 = true; const ir::RegisterData regData = fn.getRegisterData(reg); switch (regData.family) { case ir::FAMILY_BOOL: @@ -66,16 +70,148 @@ namespace gbe break; } } + + // Always declare local IDs + if (lid0 == false) o << "scalar_dw _" << uint32_t(ir::ocl::lid0) << ";\n"; + if (lid1 == false) o << "scalar_dw _" << uint32_t(ir::ocl::lid1) << ";\n"; + if (lid2 == false) o << "scalar_dw _" << uint32_t(ir::ocl::lid2) << ";\n"; } - void SimContext::loadCurbe(void) { +#define LOAD_SPECIAL_REG(CURBE, REG) do { \ + const int32_t offset = kernel->getCurbeOffset(CURBE, 0); \ + if (offset >= 0) \ + o << "LOAD(_" << uint32_t(REG) << ", curbe + " << offset << ");\n"; \ + } while (0) + + void SimContext::emitCurbeLoad(void) { // Right now curbe is only made of input argument stuff const uint32_t inputNum = fn.inputNum(); for (uint32_t inputID = 0; inputID < inputNum; ++inputID) { - + const ir::FunctionInput &input = fn.getInput(inputID); + const ir::Register reg = input.reg; + const int32_t offset = kernel->getCurbeOffset(GBE_CURBE_KERNEL_ARGUMENT, inputID); + // XXX add support for these items + GBE_ASSERT (input.type != ir::FunctionInput::VALUE && + input.type != ir::FunctionInput::STRUCTURE && + input.type != ir::FunctionInput::IMAGE && + input.type != ir::FunctionInput::LOCAL_POINTER); + GBE_ASSERT(offset >= 0); + o << "LOAD(_" << uint32_t(reg) << ", curbe + " << offset << ");\n"; } + + // We must now load the special registers needed by the kernel + LOAD_SPECIAL_REG(GBE_CURBE_LOCAL_ID_X, ir::ocl::lid0); + LOAD_SPECIAL_REG(GBE_CURBE_LOCAL_ID_Y, ir::ocl::lid1); + LOAD_SPECIAL_REG(GBE_CURBE_LOCAL_ID_Z, ir::ocl::lid2); + LOAD_SPECIAL_REG(GBE_CURBE_LOCAL_SIZE_X, ir::ocl::lsize0); + LOAD_SPECIAL_REG(GBE_CURBE_LOCAL_SIZE_Y, ir::ocl::lsize1); + LOAD_SPECIAL_REG(GBE_CURBE_LOCAL_SIZE_Z, ir::ocl::lsize2); + LOAD_SPECIAL_REG(GBE_CURBE_GLOBAL_SIZE_X, ir::ocl::gsize0); + LOAD_SPECIAL_REG(GBE_CURBE_GLOBAL_SIZE_Y, ir::ocl::gsize1); + LOAD_SPECIAL_REG(GBE_CURBE_GLOBAL_SIZE_Z, ir::ocl::gsize2); + LOAD_SPECIAL_REG(GBE_CURBE_GLOBAL_OFFSET_X, ir::ocl::goffset0); + LOAD_SPECIAL_REG(GBE_CURBE_GLOBAL_OFFSET_Y, ir::ocl::goffset1); + LOAD_SPECIAL_REG(GBE_CURBE_GLOBAL_OFFSET_Z, ir::ocl::goffset2); + LOAD_SPECIAL_REG(GBE_CURBE_GROUP_NUM_X, ir::ocl::numgroup0); + LOAD_SPECIAL_REG(GBE_CURBE_GROUP_NUM_Y, ir::ocl::numgroup1); + LOAD_SPECIAL_REG(GBE_CURBE_GROUP_NUM_Z, ir::ocl::numgroup2); + } + + static const char *typeStr(const ir::Type &type) { + switch (type) { + case ir::TYPE_BOOL: return "M"; + case ir::TYPE_S8: return "S8"; + case ir::TYPE_S16: return "S16"; + case ir::TYPE_S32: return "S32"; + case ir::TYPE_S64: return "S64"; + case ir::TYPE_U8: return "U8"; + case ir::TYPE_U16: return "U16"; + case ir::TYPE_U32: return "U32"; + case ir::TYPE_U64: return "U64"; + case ir::TYPE_HALF: return "HALF"; + case ir::TYPE_FLOAT: return "F"; + case ir::TYPE_DOUBLE: return "D"; + default: NOT_IMPLEMENTED; return NULL; + }; + } + + void SimContext::emitInstructionStream(void) { + using namespace ir; + fn.foreachInstruction([&](const Instruction &insn) { + const char *opcodeStr = NULL; + const Opcode opcode = insn.getOpcode(); +#define DECL_INSN(OPCODE, FAMILY) \ + case OP_##OPCODE: \ + if (opcode == OP_LOAD) opcodeStr = "GATHER"; \ + else if (opcode == OP_STORE) opcodeStr = "SCATTER"; \ + else opcodeStr = #OPCODE; \ + break; + switch (opcode) { + #include "ir/instruction.hxx" + default: NOT_IMPLEMENTED; +#undef DECL_INSN + } + if (opcode == OP_LABEL) { + o << "label" << cast(insn).getLabelIndex() << ":\n"; + return; + } else if (opcode == OP_BRA) { + NOT_IMPLEMENTED; + return; + } else if (opcode == OP_RET) { + std::cout << "BE AWARE OF RET: ONLY ONE RET AT THE END OF THE FUNCTION SHOULD BE OUTPUTTED!"; + o << "return;\n"; + return; + } + + // Extra checks +#if GBE_DEBUG + if (opcode == OP_LOAD) + GBE_ASSERT(cast(insn).getValueNum() == 1); + if (opcode == OP_STORE) + GBE_ASSERT(cast(insn).getValueNum() == 1); +#endif /* GBE_DEBUG */ + + // Regular compute instruction + const uint32_t dstNum = insn.getDstNum(); + const uint32_t srcNum = insn.getSrcNum(); + o << opcodeStr; + + // Append type when needed + if (insn.isMemberOf() == true) + o << "_" << typeStr(cast(insn).getType()); + else if (insn.isMemberOf() == true) + o << "_" << typeStr(cast(insn).getType()); + else if (insn.isMemberOf() == true) + o << "_" << typeStr(cast(insn).getType()); + else if (insn.isMemberOf() == true) + o << "_" << typeStr(cast(insn).getType()); + o << "("; + + // Output both destinations and sources in that order + for (uint32_t dstID = 0; dstID < dstNum; ++dstID) { + o << "_" << uint32_t(insn.getDst(dstID)); + if (dstID < dstNum-1 || srcNum > 0) o << ", "; + } + for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { + o << "_" << uint32_t(insn.getSrc(srcID)); + if (srcID < srcNum-1) o << ", "; + } + + // Append extra stuff for instructions that need it + if (opcode == OP_LOADI) { + Immediate imm = cast(insn).getImmediate(); + GBE_ASSERT(imm.type == TYPE_S32 || + imm.type == TYPE_U32 || + imm.type == TYPE_FLOAT); + o << ", " << imm.data.u32; + } else if (opcode == OP_LOAD || opcode == OP_STORE) + o << ", base"; + o << ");\n"; + }); } +#undef LOAD_SPECIAL_REG + void SimContext::emitCode(void) { SimKernel *simKernel = static_cast(this->kernel); char srcStr[L_tmpnam+1], libStr[L_tmpnam+1]; @@ -91,15 +227,19 @@ namespace gbe << "(gbe_simulator sim, uint32_t tid, scalar_dw _3, scalar_dw _4, scalar_dw _5)\n" << "{\n" << "const size_t curbe_sz = sim->get_curbe_size(sim);\n" - << "const char *curbe = (const char*) sim->get_curbe_address(sim) + curbe_sz * tid;\n"; + << "const char *curbe = (const char*) sim->get_curbe_address(sim) + curbe_sz * tid;\n" + << "char *base = (char*) sim->get_base_address(sim);\n"; this->emitRegisters(); + this->emitCurbeLoad(); + this->emitInstructionStream(); o << "}\n"; o << std::endl; o.close(); /* Compile the function */ std::cout << "# source: " << srcName << " library: " << libName << std::endl; - std::string compileCmd = "g++ -funroll-loops -shared -msse -msse2 -msse3 -mssse3 -msse4.1 -g -O3 -o "; + //std::string compileCmd = "g++ -fPIC -funroll-loops -shared -msse -msse2 -msse3 -mssse3 -msse4.1 -g -O3 -o "; + std::string compileCmd = "g++ -fPIC -funroll-loops -shared -msse -msse2 -msse3 -mssse3 -msse4.1 -g -o "; compileCmd += libName; compileCmd += " "; compileCmd += srcName; diff --git a/backend/src/backend/sim_context.hpp b/backend/src/backend/sim_context.hpp index 35135c1..9021adb 100644 --- a/backend/src/backend/sim_context.hpp +++ b/backend/src/backend/sim_context.hpp @@ -50,7 +50,9 @@ namespace gbe /*! Emit all the register declarations */ void emitRegisters(void); /*! Load the curbe data into the registers */ - void loadCurbe(void); + void emitCurbeLoad(void); + /*! Emit the instructions */ + void emitInstructionStream(void); /*! Implements base class */ virtual Kernel *allocateKernel(void); std::ofstream o; //!< Where to output the c++ string diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index ee80ccc..6941ac5 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -739,13 +739,11 @@ namespace ir { }; RegisterData Instruction::getDstData(uint32_t ID) const { - GBE_ASSERT(this->getParent() != NULL); - const Function &fn = this->getParent()->getParent(); + const Function &fn = this->getFunction(); return fn.getRegisterData(this->getDst(ID)); } RegisterData Instruction::getSrcData(uint32_t ID) const { - GBE_ASSERT(this->getParent() != NULL); - const Function &fn = this->getParent()->getParent(); + const Function &fn = this->getFunction(); return fn.getRegisterData(this->getSrc(ID)); } @@ -861,8 +859,7 @@ END_FUNCTION(Instruction, bool) #define DECL_INSN(OPCODE, CLASS) \ case OP_##OPCODE: \ { \ - GBE_ASSERT(this->getParent() != NULL); \ - const Function &fn = this->getParent()->getParent(); \ + const Function &fn = this->getFunction(); \ return reinterpret_cast(this)->CALL; \ } @@ -882,6 +879,12 @@ END_FUNCTION(Instruction, Register) #undef END_FUNCTION #undef START_FUNCTION + const Function &Instruction::getFunction(void) const { + const BasicBlock *bb = this->getParent(); + GBE_ASSERT(bb != NULL); + return bb->getParent(); + } + #define DECL_MEM_FN(CLASS, RET, PROTOTYPE, CALL) \ RET CLASS::PROTOTYPE const { \ return reinterpret_cast(this)->CALL; \ @@ -900,7 +903,6 @@ DECL_MEM_FN(StoreInstruction, AddressSpace, getAddressSpace(void), getAddressSpa DECL_MEM_FN(LoadInstruction, Type, getValueType(void), getValueType()) DECL_MEM_FN(LoadInstruction, uint32_t, getValueNum(void), getValueNum()) DECL_MEM_FN(LoadInstruction, AddressSpace, getAddressSpace(void), getAddressSpace()) -DECL_MEM_FN(LoadImmInstruction, Immediate, getImmediate(const Function &fn), getImmediate(fn)) DECL_MEM_FN(LoadImmInstruction, Type, getType(void), getType()) DECL_MEM_FN(LabelInstruction, LabelIndex, getLabelIndex(void), getLabelIndex()) DECL_MEM_FN(BranchInstruction, bool, isPredicated(void), isPredicated()) @@ -908,6 +910,11 @@ DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex()) #undef DECL_MEM_FN + Immediate LoadImmInstruction::getImmediate(void) const { + const Function &fn = this->getFunction(); + return reinterpret_cast(this)->getImmediate(fn); + } + /////////////////////////////////////////////////////////////////////////// // Implements the emission functions /////////////////////////////////////////////////////////////////////////// @@ -1038,11 +1045,8 @@ DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex()) return insn.convert(); } - std::ostream &operator<< (std::ostream &out, const Instruction &insn) - { - GBE_ASSERT(insn.getParent() != NULL); - const BasicBlock *bb = insn.getParent(); - const Function &fn = bb->getParent(); + std::ostream &operator<< (std::ostream &out, const Instruction &insn) { + const Function &fn = insn.getFunction(); switch (insn.getOpcode()) { #define DECL_INSN(OPCODE, CLASS) \ case OP_##OPCODE: \ diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index cbe89b0..3befd02 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -107,6 +107,8 @@ namespace ir { BasicBlock *getParent(void) { return parent; } const BasicBlock *getParent(void) const { return parent; } void setParent(BasicBlock *block) { this->parent = block; } + /*! Get the function from the parent basic block */ + const Function &getFunction(void) const; /*! Check that the instruction is well formed (type properly match, * registers not of bound and so on). If not well formed, provide a reason * in string why @@ -236,7 +238,7 @@ namespace ir { class LoadImmInstruction : public Instruction { public: /*! Return the value stored in the instruction */ - Immediate getImmediate(const Function &fn) const; + Immediate getImmediate(void) const; /*! Return the type of the stored value */ Type getType(void) const; /*! Return true if the given instruction is an instance of this class */ diff --git a/backend/src/sys/debug.cpp b/backend/src/sys/debug.cpp new file mode 100644 index 0000000..b892737 --- /dev/null +++ b/backend/src/sys/debug.cpp @@ -0,0 +1,66 @@ +/* + * Copyright © 2012 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + * + * Author: Benjamin Segovia + */ + +/** + * \file debug.cpp + * \author Benjamin Segovia + */ + +#include "debug.hpp" +#include +#include +#include + +namespace gbe +{ +#define DECL_DEBUG_VAR(TYPE, NAME) TYPE NAME; + #include "debug.hxx" +#undef DECL_DEBUG_VAR +} /* namespace gbe */ + +namespace +{ + template + static VarType getValue(const char *str) { + VarType value; + std::stringstream ss; + ss << std::string(str); + ss >> value; + return value; + } + + struct DebugVarInitializer + { + DebugVarInitializer(void) { +#define DECL_DEBUG_VAR(TYPE, NAME) gbe::NAME = TYPE(0); +#include "debug.hxx" +#undef DECL_DEBUG_VAR + +#define DECL_DEBUG_VAR(TYPE, NAME) do { \ + const char *str = getenv(#NAME); \ + if (str != NULL) gbe::NAME = getValue(str); \ +} while (0); +#include "debug.hxx" +#undef DECL_DEBUG_VAR + } + }; + + static DebugVarInitializer debugVarInitializer; +} /* namespace */ + diff --git a/backend/src/sys/debug.hpp b/backend/src/sys/debug.hpp new file mode 100644 index 0000000..1abbfca --- /dev/null +++ b/backend/src/sys/debug.hpp @@ -0,0 +1,33 @@ +/* + * Copyright © 2012 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + * + * Author: Benjamin Segovia + */ + +/** + * \file debug.hpp + * \author Benjamin Segovia + * + * shitloads of debug variables (set on pre-main) that the user can set + */ + +namespace gbe +{ +#define DECL_DEBUG_VAR(TYPE, NAME) extern TYPE NAME; + #include "debug.hxx" +#undef DECL_DEBUG_VAR +} /* namespace gbe */ + diff --git a/backend/src/sys/debug.hxx b/backend/src/sys/debug.hxx new file mode 100644 index 0000000..1f9a66a --- /dev/null +++ b/backend/src/sys/debug.hxx @@ -0,0 +1,3 @@ +DECL_DEBUG_VAR(bool, OCL_OUTPUT_GEN_IR) +DECL_DEBUG_VAR(bool, OCL_OUTPUT_LLVM) + -- 2.7.4