#undef DECL_PUBLIC_WORK_ITEM_FN
inline unsigned int get_global_id(unsigned int dim) {
- return get_local_id(dim) + get_local_size(dim) * get_num_groups(dim);
+ return get_local_id(dim) + get_local_size(dim) * get_group_id(dim);
}
__attribute__ ((pure,const,overloadable)) float mad(float a, float b, float c);
sys/condition.hpp
sys/platform.cpp
sys/platform.hpp
+ sys/debug.cpp
+ sys/debug.hpp
+ sys/debug.hxx
ir/context.cpp
ir/context.hpp
ir/profile.cpp
#include "ir/unit.hpp"
#include "ir/function.hpp"
#include "ir/profile.hpp"
+#include "ir/liveness.hpp"
+#include "ir/value.hpp"
#include <algorithm>
namespace gbe
{
Context::Context(const ir::Unit &unit, const std::string &name) :
unit(unit), fn(*unit.getFunction(name)), name(name), liveness(NULL), dag(NULL)
- { GBE_ASSERT(unit.getPointerSize() == ir::POINTER_32_BITS);
+ {
+ GBE_ASSERT(unit.getPointerSize() == ir::POINTER_32_BITS);
+ this->liveness = GBE_NEW(ir::Liveness, (ir::Function&) fn);
+ this->dag = GBE_NEW(ir::FunctionDAG, *this->liveness);
this->simdWidth = 16; /* XXX environment variable for that to start with */
}
- Context::~Context(void) {}
+ Context::~Context(void) {
+ GBE_SAFE_DELETE(this->dag);
+ GBE_SAFE_DELETE(this->liveness);
+ }
Kernel *Context::compileKernel(void) {
this->kernel = this->allocateKernel();
for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
const ir::Register reg = insn.getSrc(srcID);
if (fn.isSpecialReg(reg) == false) continue;
-
+ if (specialRegs.contains(reg) == true) continue;
INSERT_REG(lsize0, LOCAL_SIZE_X)
INSERT_REG(lsize1, LOCAL_SIZE_Y)
INSERT_REG(lsize2, LOCAL_SIZE_Z)
INSERT_REG(numgroup0, GROUP_NUM_X)
INSERT_REG(numgroup1, GROUP_NUM_Y)
INSERT_REG(numgroup2, GROUP_NUM_Z);
+ specialRegs.insert(reg);
}
});
+ kernel->curbeSize = ALIGN(kernel->curbeSize, 32);
+
+ // Local IDs always go at the end of the curbe
+ const size_t localIDSize = sizeof(uint32_t) * this->simdWidth;
+ const PatchInfo lid0(GBE_CURBE_LOCAL_ID_X, 0, kernel->curbeSize+0*localIDSize);
+ const PatchInfo lid1(GBE_CURBE_LOCAL_ID_Y, 0, kernel->curbeSize+1*localIDSize);
+ const PatchInfo lid2(GBE_CURBE_LOCAL_ID_Z, 0, kernel->curbeSize+2*localIDSize);
+ kernel->patches.push_back(lid0);
+ kernel->patches.push_back(lid1);
+ kernel->patches.push_back(lid2);
- // After this point the vector is immutable. so, Sorting it will make
+ // After this point the vector is immutable. Sorting it will make
// research faster
std::sort(kernel->patches.begin(), kernel->patches.end());
}
template <uint32_t vectorNum>\
INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const SRC_TYPE &v1) {\
NAME(dst, simd_dw<vectorNum>(v0), v1);\
+}\
+template <uint32_t vectorNum>\
+INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const scalar_dw &v1) {\
+ NAME(dst, simd_dw<vectorNum>(v0), simd_dw<vectorNum>(v1));\
}
VEC_OP(simd_dw<vectorNum>, simd_dw<vectorNum>, ADD_F, _mm_add_ps, ID, ID, ID);
VEC_OP(simd_dw<vectorNum>, simd_dw<vectorNum>, SUB_F, _mm_sub_ps, ID, ID, ID);
template <uint32_t vectorNum>\
INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const SRC_TYPE &v1) {\
NAME(dst, simd_dw<vectorNum>(v0), v1);\
+}\
+template <uint32_t vectorNum>\
+INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const scalar_dw &v1) {\
+ NAME(dst, simd_dw<vectorNum>(v0), simd_dw<vectorNum>(v1));\
}
VEC_OP(simd_m<vectorNum>, simd_dw<vectorNum>, GE_S32, _mm_cmplt_epi32, SI2PS, PS2SI, PS2SI);
VEC_OP(simd_m<vectorNum>, simd_dw<vectorNum>, LE_S32, _mm_cmpgt_epi32, SI2PS, PS2SI, PS2SI);
template <uint32_t vectorNum>\
INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const SRC_TYPE &v1) {\
NAME(dst, simd_dw<vectorNum>(v0), v1);\
+}\
+template <uint32_t vectorNum>\
+INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const scalar_dw &v1) {\
+ NAME(dst, simd_dw<vectorNum>(v0), simd_dw<vectorNum>(v1));\
}
VEC_OP(simd_dw<vectorNum>, simd_dw<vectorNum>, MUL_S32, *, s);
VEC_OP(simd_dw<vectorNum>, simd_dw<vectorNum>, DIV_S32, /, s);
template <uint32_t vectorNum>
INLINE void NE_S32(simd_m<vectorNum> &dst,
const simd_dw<vectorNum> &v0,
- const scalar_dw &v1)
+ const simd_dw<vectorNum> &v1)
{
- NE_S32(dst, v0, simd_dw<vectorNum>(v1));
+ for (uint32_t i = 0; i < vectorNum; ++i)
+ dst.m[i] = _mm_xor_ps(alltrue.v, SI2PS(_mm_cmpeq_epi32(PS2SI(v0.m[i]), PS2SI(v1.m[i]))));
}
template <uint32_t vectorNum>
INLINE void NE_S32(simd_m<vectorNum> &dst,
- const scalar_dw &v0,
- const simd_dw<vectorNum> &v1)
+ const simd_dw<vectorNum> &v0,
+ const scalar_dw &v1)
{
- NE_S32(dst, simd_dw<vectorNum>(v0), v1);
+ NE_S32(dst, v0, simd_dw<vectorNum>(v1));
}
template <uint32_t vectorNum>
INLINE void NE_S32(simd_m<vectorNum> &dst,
- const simd_dw<vectorNum> &v0,
+ const scalar_dw &v0,
const simd_dw<vectorNum> &v1)
{
- for (uint32_t i = 0; i < vectorNum; ++i)
- dst.m[i] = _mm_xor_ps(alltrue.v, SI2PS(_mm_cmpeq_epi32(PS2SI(v0.m[i]), PS2SI(v1.m[i]))));
+ NE_S32(dst, simd_dw<vectorNum>(v0), v1);
}
/* Load from contiguous double words */
/* Load immediates */
template <uint32_t vectorNum>
-INLINE void LOADI(simd_dw<vectorNum> &dst, float f) {
+INLINE void LOADI(simd_dw<vectorNum> &dst, uint32_t u) {
+ union { uint32_t u; float f; } cast;
+ cast.u = u;
for (uint32_t i = 0; i < vectorNum; ++i)
- dst.m[i] = _mm_load1_ps(&f);
+ dst.m[i] = _mm_load1_ps(&cast.f);
}
+#include <cstdio>
/* Scatter */
template <uint32_t vectorNum>
-INLINE void SCATTER(const simd_dw<vectorNum> &value,
- const simd_dw<vectorNum> &offset,
+INLINE void SCATTER(const simd_dw<vectorNum> &offset,
+ const simd_dw<vectorNum> &value,
char *base_address) {
for (uint32_t i = 0; i < vectorNum; ++i) {
const int v0 = _mm_extract_epi32(PS2SI(value.m[i]), 0);
const int v1 = _mm_extract_epi32(PS2SI(value.m[i]), 1);
const int v2 = _mm_extract_epi32(PS2SI(value.m[i]), 2);
const int v3 = _mm_extract_epi32(PS2SI(value.m[i]), 3);
- const int o0 = _mm_extract_epi32(offset.m[i], 0);
- const int o1 = _mm_extract_epi32(offset.m[i], 1);
- const int o2 = _mm_extract_epi32(offset.m[i], 2);
- const int o3 = _mm_extract_epi32(offset.m[i], 3);
+ const int o0 = _mm_extract_epi32(PS2SI(offset.m[i]), 0);
+ const int o1 = _mm_extract_epi32(PS2SI(offset.m[i]), 1);
+ const int o2 = _mm_extract_epi32(PS2SI(offset.m[i]), 2);
+ const int o3 = _mm_extract_epi32(PS2SI(offset.m[i]), 3);
*(int*)(base_address + o0) = v0;
*(int*)(base_address + o1) = v1;
*(int*)(base_address + o2) = v2;
}
}
template <uint32_t vectorNum>
-INLINE void SCATTER(const scalar_dw &value,
- const simd_dw<vectorNum> &offset,
+INLINE void SCATTER(const simd_dw<vectorNum> &offset,
+ const scalar_dw &value,
char *base_address) {
SCATTER(simd_dw<vectorNum>(value), offset, base_address);
}
template <uint32_t vectorNum>
-INLINE void SCATTER(const simd_dw<vectorNum> &value,
- const scalar_dw &offset,
+INLINE void SCATTER(const scalar_dw &offset,
+ const simd_dw<vectorNum> &value,
char *base_address) {
SCATTER(value, simd_dw<vectorNum>(offset), base_address);
}
"template <uint32_t vectorNum>\\\n"
"INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const SRC_TYPE &v1) {\\\n"
" NAME(dst, simd_dw<vectorNum>(v0), v1);\\\n"
+"}\\\n"
+"template <uint32_t vectorNum>\\\n"
+"INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const scalar_dw &v1) {\\\n"
+" NAME(dst, simd_dw<vectorNum>(v0), simd_dw<vectorNum>(v1));\\\n"
"}\n"
"VEC_OP(simd_dw<vectorNum>, simd_dw<vectorNum>, ADD_F, _mm_add_ps, ID, ID, ID);\n"
"VEC_OP(simd_dw<vectorNum>, simd_dw<vectorNum>, SUB_F, _mm_sub_ps, ID, ID, ID);\n"
"template <uint32_t vectorNum>\\\n"
"INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const SRC_TYPE &v1) {\\\n"
" NAME(dst, simd_dw<vectorNum>(v0), v1);\\\n"
+"}\\\n"
+"template <uint32_t vectorNum>\\\n"
+"INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const scalar_dw &v1) {\\\n"
+" NAME(dst, simd_dw<vectorNum>(v0), simd_dw<vectorNum>(v1));\\\n"
"}\n"
"VEC_OP(simd_m<vectorNum>, simd_dw<vectorNum>, GE_S32, _mm_cmplt_epi32, SI2PS, PS2SI, PS2SI);\n"
"VEC_OP(simd_m<vectorNum>, simd_dw<vectorNum>, LE_S32, _mm_cmpgt_epi32, SI2PS, PS2SI, PS2SI);\n"
"template <uint32_t vectorNum>\\\n"
"INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const SRC_TYPE &v1) {\\\n"
" NAME(dst, simd_dw<vectorNum>(v0), v1);\\\n"
+"}\\\n"
+"template <uint32_t vectorNum>\\\n"
+"INLINE void NAME(DST_TYPE &dst, const scalar_dw &v0, const scalar_dw &v1) {\\\n"
+" NAME(dst, simd_dw<vectorNum>(v0), simd_dw<vectorNum>(v1));\\\n"
"}\n"
"VEC_OP(simd_dw<vectorNum>, simd_dw<vectorNum>, MUL_S32, *, s);\n"
"VEC_OP(simd_dw<vectorNum>, simd_dw<vectorNum>, DIV_S32, /, s);\n"
"template <uint32_t vectorNum>\n"
"INLINE void NE_S32(simd_m<vectorNum> &dst,\n"
" const simd_dw<vectorNum> &v0,\n"
-" const scalar_dw &v1)\n"
+" const simd_dw<vectorNum> &v1)\n"
"{\n"
-" NE_S32(dst, v0, simd_dw<vectorNum>(v1));\n"
+" for (uint32_t i = 0; i < vectorNum; ++i)\n"
+" dst.m[i] = _mm_xor_ps(alltrue.v, SI2PS(_mm_cmpeq_epi32(PS2SI(v0.m[i]), PS2SI(v1.m[i]))));\n"
"}\n"
"template <uint32_t vectorNum>\n"
"INLINE void NE_S32(simd_m<vectorNum> &dst,\n"
-" const scalar_dw &v0,\n"
-" const simd_dw<vectorNum> &v1)\n"
+" const simd_dw<vectorNum> &v0,\n"
+" const scalar_dw &v1)\n"
"{\n"
-" NE_S32(dst, simd_dw<vectorNum>(v0), v1);\n"
+" NE_S32(dst, v0, simd_dw<vectorNum>(v1));\n"
"}\n"
"template <uint32_t vectorNum>\n"
"INLINE void NE_S32(simd_m<vectorNum> &dst,\n"
-" const simd_dw<vectorNum> &v0,\n"
+" const scalar_dw &v0,\n"
" const simd_dw<vectorNum> &v1)\n"
"{\n"
-" for (uint32_t i = 0; i < vectorNum; ++i)\n"
-" dst.m[i] = _mm_xor_ps(alltrue.v, SI2PS(_mm_cmpeq_epi32(PS2SI(v0.m[i]), PS2SI(v1.m[i]))));\n"
+" NE_S32(dst, simd_dw<vectorNum>(v0), v1);\n"
"}\n"
"\n"
"/* Load from contiguous double words */\n"
"\n"
"/* Load immediates */\n"
"template <uint32_t vectorNum>\n"
-"INLINE void LOADI(simd_dw<vectorNum> &dst, float f) {\n"
+"INLINE void LOADI(simd_dw<vectorNum> &dst, uint32_t u) {\n"
+" union { uint32_t u; float f; } cast;\n"
+" cast.u = u;\n"
" for (uint32_t i = 0; i < vectorNum; ++i)\n"
-" dst.m[i] = _mm_load1_ps(&f);\n"
+" dst.m[i] = _mm_load1_ps(&cast.f);\n"
"}\n"
"\n"
+"#include <cstdio>\n"
"/* Scatter */\n"
"template <uint32_t vectorNum>\n"
-"INLINE void SCATTER(const simd_dw<vectorNum> &value,\n"
-" const simd_dw<vectorNum> &offset,\n"
+"INLINE void SCATTER(const simd_dw<vectorNum> &offset,\n"
+" const simd_dw<vectorNum> &value,\n"
" char *base_address) {\n"
" for (uint32_t i = 0; i < vectorNum; ++i) {\n"
" const int v0 = _mm_extract_epi32(PS2SI(value.m[i]), 0);\n"
" const int v1 = _mm_extract_epi32(PS2SI(value.m[i]), 1);\n"
" const int v2 = _mm_extract_epi32(PS2SI(value.m[i]), 2);\n"
" const int v3 = _mm_extract_epi32(PS2SI(value.m[i]), 3);\n"
-" const int o0 = _mm_extract_epi32(offset.m[i], 0);\n"
-" const int o1 = _mm_extract_epi32(offset.m[i], 1);\n"
-" const int o2 = _mm_extract_epi32(offset.m[i], 2);\n"
-" const int o3 = _mm_extract_epi32(offset.m[i], 3);\n"
+" const int o0 = _mm_extract_epi32(PS2SI(offset.m[i]), 0);\n"
+" const int o1 = _mm_extract_epi32(PS2SI(offset.m[i]), 1);\n"
+" const int o2 = _mm_extract_epi32(PS2SI(offset.m[i]), 2);\n"
+" const int o3 = _mm_extract_epi32(PS2SI(offset.m[i]), 3);\n"
" *(int*)(base_address + o0) = v0;\n"
" *(int*)(base_address + o1) = v1;\n"
" *(int*)(base_address + o2) = v2;\n"
" }\n"
"}\n"
"template <uint32_t vectorNum>\n"
-"INLINE void SCATTER(const scalar_dw &value,\n"
-" const simd_dw<vectorNum> &offset,\n"
+"INLINE void SCATTER(const simd_dw<vectorNum> &offset,\n"
+" const scalar_dw &value,\n"
" char *base_address) {\n"
" SCATTER(simd_dw<vectorNum>(value), offset, base_address);\n"
"}\n"
"template <uint32_t vectorNum>\n"
-"INLINE void SCATTER(const simd_dw<vectorNum> &value,\n"
-" const scalar_dw &offset,\n"
+"INLINE void SCATTER(const scalar_dw &offset,\n"
+" const simd_dw<vectorNum> &value,\n"
" char *base_address) {\n"
" SCATTER(value, simd_dw<vectorNum>(offset), base_address);\n"
"}\n"
void SimContext::emitRegisters(void) {
GBE_ASSERT(fn.getProfile() == ir::PROFILE_OCL);
const uint32_t regNum = fn.regNum();
+ bool lid0 = false, lid1 = false, lid2 = false; // for local id registers
for (uint32_t regID = 0; regID < regNum; ++regID) {
const ir::Register reg(regID);
if (reg == ir::ocl::groupid0 ||
reg == ir::ocl::groupid1 ||
reg == ir::ocl::groupid2)
continue;
+ if (reg == ir::ocl::lid0) lid0 = true;
+ if (reg == ir::ocl::lid1) lid1 = true;
+ if (reg == ir::ocl::lid2) lid2 = true;
const ir::RegisterData regData = fn.getRegisterData(reg);
switch (regData.family) {
case ir::FAMILY_BOOL:
break;
}
}
+
+ // Always declare local IDs
+ if (lid0 == false) o << "scalar_dw _" << uint32_t(ir::ocl::lid0) << ";\n";
+ if (lid1 == false) o << "scalar_dw _" << uint32_t(ir::ocl::lid1) << ";\n";
+ if (lid2 == false) o << "scalar_dw _" << uint32_t(ir::ocl::lid2) << ";\n";
}
- void SimContext::loadCurbe(void) {
+#define LOAD_SPECIAL_REG(CURBE, REG) do { \
+ const int32_t offset = kernel->getCurbeOffset(CURBE, 0); \
+ if (offset >= 0) \
+ o << "LOAD(_" << uint32_t(REG) << ", curbe + " << offset << ");\n"; \
+ } while (0)
+
+ void SimContext::emitCurbeLoad(void) {
// Right now curbe is only made of input argument stuff
const uint32_t inputNum = fn.inputNum();
for (uint32_t inputID = 0; inputID < inputNum; ++inputID) {
-
+ const ir::FunctionInput &input = fn.getInput(inputID);
+ const ir::Register reg = input.reg;
+ const int32_t offset = kernel->getCurbeOffset(GBE_CURBE_KERNEL_ARGUMENT, inputID);
+ // XXX add support for these items
+ GBE_ASSERT (input.type != ir::FunctionInput::VALUE &&
+ input.type != ir::FunctionInput::STRUCTURE &&
+ input.type != ir::FunctionInput::IMAGE &&
+ input.type != ir::FunctionInput::LOCAL_POINTER);
+ GBE_ASSERT(offset >= 0);
+ o << "LOAD(_" << uint32_t(reg) << ", curbe + " << offset << ");\n";
}
+
+ // We must now load the special registers needed by the kernel
+ LOAD_SPECIAL_REG(GBE_CURBE_LOCAL_ID_X, ir::ocl::lid0);
+ LOAD_SPECIAL_REG(GBE_CURBE_LOCAL_ID_Y, ir::ocl::lid1);
+ LOAD_SPECIAL_REG(GBE_CURBE_LOCAL_ID_Z, ir::ocl::lid2);
+ LOAD_SPECIAL_REG(GBE_CURBE_LOCAL_SIZE_X, ir::ocl::lsize0);
+ LOAD_SPECIAL_REG(GBE_CURBE_LOCAL_SIZE_Y, ir::ocl::lsize1);
+ LOAD_SPECIAL_REG(GBE_CURBE_LOCAL_SIZE_Z, ir::ocl::lsize2);
+ LOAD_SPECIAL_REG(GBE_CURBE_GLOBAL_SIZE_X, ir::ocl::gsize0);
+ LOAD_SPECIAL_REG(GBE_CURBE_GLOBAL_SIZE_Y, ir::ocl::gsize1);
+ LOAD_SPECIAL_REG(GBE_CURBE_GLOBAL_SIZE_Z, ir::ocl::gsize2);
+ LOAD_SPECIAL_REG(GBE_CURBE_GLOBAL_OFFSET_X, ir::ocl::goffset0);
+ LOAD_SPECIAL_REG(GBE_CURBE_GLOBAL_OFFSET_Y, ir::ocl::goffset1);
+ LOAD_SPECIAL_REG(GBE_CURBE_GLOBAL_OFFSET_Z, ir::ocl::goffset2);
+ LOAD_SPECIAL_REG(GBE_CURBE_GROUP_NUM_X, ir::ocl::numgroup0);
+ LOAD_SPECIAL_REG(GBE_CURBE_GROUP_NUM_Y, ir::ocl::numgroup1);
+ LOAD_SPECIAL_REG(GBE_CURBE_GROUP_NUM_Z, ir::ocl::numgroup2);
+ }
+
+ static const char *typeStr(const ir::Type &type) {
+ switch (type) {
+ case ir::TYPE_BOOL: return "M";
+ case ir::TYPE_S8: return "S8";
+ case ir::TYPE_S16: return "S16";
+ case ir::TYPE_S32: return "S32";
+ case ir::TYPE_S64: return "S64";
+ case ir::TYPE_U8: return "U8";
+ case ir::TYPE_U16: return "U16";
+ case ir::TYPE_U32: return "U32";
+ case ir::TYPE_U64: return "U64";
+ case ir::TYPE_HALF: return "HALF";
+ case ir::TYPE_FLOAT: return "F";
+ case ir::TYPE_DOUBLE: return "D";
+ default: NOT_IMPLEMENTED; return NULL;
+ };
+ }
+
+ void SimContext::emitInstructionStream(void) {
+ using namespace ir;
+ fn.foreachInstruction([&](const Instruction &insn) {
+ const char *opcodeStr = NULL;
+ const Opcode opcode = insn.getOpcode();
+#define DECL_INSN(OPCODE, FAMILY) \
+ case OP_##OPCODE: \
+ if (opcode == OP_LOAD) opcodeStr = "GATHER"; \
+ else if (opcode == OP_STORE) opcodeStr = "SCATTER"; \
+ else opcodeStr = #OPCODE; \
+ break;
+ switch (opcode) {
+ #include "ir/instruction.hxx"
+ default: NOT_IMPLEMENTED;
+#undef DECL_INSN
+ }
+ if (opcode == OP_LABEL) {
+ o << "label" << cast<LabelInstruction>(insn).getLabelIndex() << ":\n";
+ return;
+ } else if (opcode == OP_BRA) {
+ NOT_IMPLEMENTED;
+ return;
+ } else if (opcode == OP_RET) {
+ std::cout << "BE AWARE OF RET: ONLY ONE RET AT THE END OF THE FUNCTION SHOULD BE OUTPUTTED!";
+ o << "return;\n";
+ return;
+ }
+
+ // Extra checks
+#if GBE_DEBUG
+ if (opcode == OP_LOAD)
+ GBE_ASSERT(cast<LoadInstruction>(insn).getValueNum() == 1);
+ if (opcode == OP_STORE)
+ GBE_ASSERT(cast<StoreInstruction>(insn).getValueNum() == 1);
+#endif /* GBE_DEBUG */
+
+ // Regular compute instruction
+ const uint32_t dstNum = insn.getDstNum();
+ const uint32_t srcNum = insn.getSrcNum();
+ o << opcodeStr;
+
+ // Append type when needed
+ if (insn.isMemberOf<UnaryInstruction>() == true)
+ o << "_" << typeStr(cast<UnaryInstruction>(insn).getType());
+ else if (insn.isMemberOf<BinaryInstruction>() == true)
+ o << "_" << typeStr(cast<BinaryInstruction>(insn).getType());
+ else if (insn.isMemberOf<TernaryInstruction>() == true)
+ o << "_" << typeStr(cast<BinaryInstruction>(insn).getType());
+ else if (insn.isMemberOf<CompareInstruction>() == true)
+ o << "_" << typeStr(cast<CompareInstruction>(insn).getType());
+ o << "(";
+
+ // Output both destinations and sources in that order
+ for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
+ o << "_" << uint32_t(insn.getDst(dstID));
+ if (dstID < dstNum-1 || srcNum > 0) o << ", ";
+ }
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+ o << "_" << uint32_t(insn.getSrc(srcID));
+ if (srcID < srcNum-1) o << ", ";
+ }
+
+ // Append extra stuff for instructions that need it
+ if (opcode == OP_LOADI) {
+ Immediate imm = cast<LoadImmInstruction>(insn).getImmediate();
+ GBE_ASSERT(imm.type == TYPE_S32 ||
+ imm.type == TYPE_U32 ||
+ imm.type == TYPE_FLOAT);
+ o << ", " << imm.data.u32;
+ } else if (opcode == OP_LOAD || opcode == OP_STORE)
+ o << ", base";
+ o << ");\n";
+ });
}
+#undef LOAD_SPECIAL_REG
+
void SimContext::emitCode(void) {
SimKernel *simKernel = static_cast<SimKernel*>(this->kernel);
char srcStr[L_tmpnam+1], libStr[L_tmpnam+1];
<< "(gbe_simulator sim, uint32_t tid, scalar_dw _3, scalar_dw _4, scalar_dw _5)\n"
<< "{\n"
<< "const size_t curbe_sz = sim->get_curbe_size(sim);\n"
- << "const char *curbe = (const char*) sim->get_curbe_address(sim) + curbe_sz * tid;\n";
+ << "const char *curbe = (const char*) sim->get_curbe_address(sim) + curbe_sz * tid;\n"
+ << "char *base = (char*) sim->get_base_address(sim);\n";
this->emitRegisters();
+ this->emitCurbeLoad();
+ this->emitInstructionStream();
o << "}\n";
o << std::endl;
o.close();
/* Compile the function */
std::cout << "# source: " << srcName << " library: " << libName << std::endl;
- std::string compileCmd = "g++ -funroll-loops -shared -msse -msse2 -msse3 -mssse3 -msse4.1 -g -O3 -o ";
+ //std::string compileCmd = "g++ -fPIC -funroll-loops -shared -msse -msse2 -msse3 -mssse3 -msse4.1 -g -O3 -o ";
+ std::string compileCmd = "g++ -fPIC -funroll-loops -shared -msse -msse2 -msse3 -mssse3 -msse4.1 -g -o ";
compileCmd += libName;
compileCmd += " ";
compileCmd += srcName;
/*! Emit all the register declarations */
void emitRegisters(void);
/*! Load the curbe data into the registers */
- void loadCurbe(void);
+ void emitCurbeLoad(void);
+ /*! Emit the instructions */
+ void emitInstructionStream(void);
/*! Implements base class */
virtual Kernel *allocateKernel(void);
std::ofstream o; //!< Where to output the c++ string
};
RegisterData Instruction::getDstData(uint32_t ID) const {
- GBE_ASSERT(this->getParent() != NULL);
- const Function &fn = this->getParent()->getParent();
+ const Function &fn = this->getFunction();
return fn.getRegisterData(this->getDst(ID));
}
RegisterData Instruction::getSrcData(uint32_t ID) const {
- GBE_ASSERT(this->getParent() != NULL);
- const Function &fn = this->getParent()->getParent();
+ const Function &fn = this->getFunction();
return fn.getRegisterData(this->getSrc(ID));
}
#define DECL_INSN(OPCODE, CLASS) \
case OP_##OPCODE: \
{ \
- GBE_ASSERT(this->getParent() != NULL); \
- const Function &fn = this->getParent()->getParent(); \
+ const Function &fn = this->getFunction(); \
return reinterpret_cast<const internal::CLASS*>(this)->CALL; \
}
#undef END_FUNCTION
#undef START_FUNCTION
+ const Function &Instruction::getFunction(void) const {
+ const BasicBlock *bb = this->getParent();
+ GBE_ASSERT(bb != NULL);
+ return bb->getParent();
+ }
+
#define DECL_MEM_FN(CLASS, RET, PROTOTYPE, CALL) \
RET CLASS::PROTOTYPE const { \
return reinterpret_cast<const internal::CLASS*>(this)->CALL; \
DECL_MEM_FN(LoadInstruction, Type, getValueType(void), getValueType())
DECL_MEM_FN(LoadInstruction, uint32_t, getValueNum(void), getValueNum())
DECL_MEM_FN(LoadInstruction, AddressSpace, getAddressSpace(void), getAddressSpace())
-DECL_MEM_FN(LoadImmInstruction, Immediate, getImmediate(const Function &fn), getImmediate(fn))
DECL_MEM_FN(LoadImmInstruction, Type, getType(void), getType())
DECL_MEM_FN(LabelInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
DECL_MEM_FN(BranchInstruction, bool, isPredicated(void), isPredicated())
#undef DECL_MEM_FN
+ Immediate LoadImmInstruction::getImmediate(void) const {
+ const Function &fn = this->getFunction();
+ return reinterpret_cast<const internal::LoadImmInstruction*>(this)->getImmediate(fn);
+ }
+
///////////////////////////////////////////////////////////////////////////
// Implements the emission functions
///////////////////////////////////////////////////////////////////////////
return insn.convert();
}
- std::ostream &operator<< (std::ostream &out, const Instruction &insn)
- {
- GBE_ASSERT(insn.getParent() != NULL);
- const BasicBlock *bb = insn.getParent();
- const Function &fn = bb->getParent();
+ std::ostream &operator<< (std::ostream &out, const Instruction &insn) {
+ const Function &fn = insn.getFunction();
switch (insn.getOpcode()) {
#define DECL_INSN(OPCODE, CLASS) \
case OP_##OPCODE: \
BasicBlock *getParent(void) { return parent; }
const BasicBlock *getParent(void) const { return parent; }
void setParent(BasicBlock *block) { this->parent = block; }
+ /*! Get the function from the parent basic block */
+ const Function &getFunction(void) const;
/*! Check that the instruction is well formed (type properly match,
* registers not of bound and so on). If not well formed, provide a reason
* in string why
class LoadImmInstruction : public Instruction {
public:
/*! Return the value stored in the instruction */
- Immediate getImmediate(const Function &fn) const;
+ Immediate getImmediate(void) const;
/*! Return the type of the stored value */
Type getType(void) const;
/*! Return true if the given instruction is an instance of this class */
--- /dev/null
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+/**
+ * \file debug.cpp
+ * \author Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+#include "debug.hpp"
+#include <cstdio>
+#include <sstream>
+#include <string>
+
+namespace gbe
+{
+#define DECL_DEBUG_VAR(TYPE, NAME) TYPE NAME;
+ #include "debug.hxx"
+#undef DECL_DEBUG_VAR
+} /* namespace gbe */
+
+namespace
+{
+ template <typename VarType>
+ static VarType getValue(const char *str) {
+ VarType value;
+ std::stringstream ss;
+ ss << std::string(str);
+ ss >> value;
+ return value;
+ }
+
+ struct DebugVarInitializer
+ {
+ DebugVarInitializer(void) {
+#define DECL_DEBUG_VAR(TYPE, NAME) gbe::NAME = TYPE(0);
+#include "debug.hxx"
+#undef DECL_DEBUG_VAR
+
+#define DECL_DEBUG_VAR(TYPE, NAME) do { \
+ const char *str = getenv(#NAME); \
+ if (str != NULL) gbe::NAME = getValue<TYPE>(str); \
+} while (0);
+#include "debug.hxx"
+#undef DECL_DEBUG_VAR
+ }
+ };
+
+ static DebugVarInitializer debugVarInitializer;
+} /* namespace */
+
--- /dev/null
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+/**
+ * \file debug.hpp
+ * \author Benjamin Segovia <benjamin.segovia@intel.com>
+ *
+ * shitloads of debug variables (set on pre-main) that the user can set
+ */
+
+namespace gbe
+{
+#define DECL_DEBUG_VAR(TYPE, NAME) extern TYPE NAME;
+ #include "debug.hxx"
+#undef DECL_DEBUG_VAR
+} /* namespace gbe */
+
--- /dev/null
+DECL_DEBUG_VAR(bool, OCL_OUTPUT_GEN_IR)
+DECL_DEBUG_VAR(bool, OCL_OUTPUT_LLVM)
+