add_subdirectory(llvm)
-set (TO_STRINGIFY_FILES simulator sim_vector)
+macro (stringify TO_STRINGIFY_PATH TO_STRINGIFY_FILES)
foreach (to_stringify_file ${TO_STRINGIFY_FILES})
- set (input_file ${GBE_SOURCE_DIR}/src/backend/sim/${to_stringify_file}.h)
- set (output_file ${GBE_SOURCE_DIR}/src/backend/sim/${to_stringify_file}_str.cpp)
+ set (input_file ${TO_STRINGIFY_PATH}/${to_stringify_file}.h)
+ set (output_file ${TO_STRINGIFY_PATH}/${to_stringify_file}_str.cpp)
set (string_header "\\\"string\\\"")
add_custom_command(
OUTPUT ${output_file}
COMMAND echo "" >> ${output_file}
MAIN_DEPENDENCY ${input_file})
endforeach (to_stringify_file)
+endmacro (stringify)
+
+set (TO_STRINGIFY_FILES simulator sim_vector)
+stringify ("${GBE_SOURCE_DIR}/src/backend/sim/" "${TO_STRINGIFY_FILES}")
+set (TO_STRINGIFY_FILES stdlib)
+stringify ("${GBE_SOURCE_DIR}/src/llvm/" "${TO_STRINGIFY_FILES}")
if (GBE_USE_BLOB)
set (GBE_SRC blob.cpp)
ir/function.hpp
ir/value.cpp
ir/value.hpp
+ llvm/stdlib_str.cpp
backend/context.cpp
backend/context.hpp
backend/program.cpp
#include "ir/profile.hpp"
#include "ir/liveness.hpp"
#include "ir/value.hpp"
+#include "sys/cvar.hpp"
#include <algorithm>
namespace gbe
{
+
+ IVAR(OCL_SIMD_WIDTH, 8, 16, 32);
+
Context::Context(const ir::Unit &unit, const std::string &name) :
unit(unit), fn(*unit.getFunction(name)), name(name), liveness(NULL), dag(NULL)
{
GBE_ASSERT(unit.getPointerSize() == ir::POINTER_32_BITS);
this->liveness = GBE_NEW(ir::Liveness, (ir::Function&) fn);
this->dag = GBE_NEW(ir::FunctionDAG, *this->liveness);
- this->simdWidth = 16; /* XXX environment variable for that to start with */
+ this->simdWidth = nextHighestPowerOf2(OCL_SIMD_WIDTH);
}
Context::~Context(void) {
GBE_SAFE_DELETE(this->dag);
this->kernel = this->allocateKernel();
this->buildPatchList();
this->buildArgList();
+ this->buildUsedLabels();
this->emitCode();
return this->kernel;
}
}
}
+ void Context::buildUsedLabels(void) {
+ usedLabels.clear();
+ fn.foreachInstruction([this](const ir::Instruction &insn) {
+ using namespace ir;
+ if (insn.getOpcode() != OP_BRA) return;
+ const LabelIndex index = cast<BranchInstruction>(insn).getLabelIndex();
+ usedLabels.insert(index);
+ });
+ }
+
bool Context::isScalarReg(const ir::Register ®) const {
GBE_ASSERT(fn.getProfile() == ir::Profile::PROFILE_OCL);
if (fn.getInput(reg) != NULL)
#define __GBE_CONTEXT_HPP__
#include "sys/platform.hpp"
+#include "sys/set.hpp"
+#include "ir/instruction.hpp"
#include <string>
namespace gbe {
~Context(void);
/*! Compile the code */
Kernel *compileKernel(void);
+ /*! Tells if the labels is used */
+ INLINE bool isLabelUsed(ir::LabelIndex index) const {
+ return usedLabels.contains(index);
+ }
+ /*! Tells if the register is used */
+ bool isRegUsed(const ir::Register ®) const;
protected:
/*! Build the curbe patch list for the given kernel */
void buildPatchList(void);
/*! Build the list of arguments to set to launch the kernel */
void buildArgList(void);
+ /*! Build the sets of used labels */
+ void buildUsedLabels(void);
/*! Indicate if a register is scalar or not */
bool isScalarReg(const ir::Register ®) const;
/*! Build the instruction stream */
virtual void emitCode(void) = 0;
/*! Allocate a new empty kernel */
virtual Kernel *allocateKernel(void) = 0;
- const ir::Unit &unit; //!< Unit that contains the kernel
- const ir::Function &fn; //!< Function to compile
- std::string name; //!< Name of the kernel to compile
- Kernel *kernel; //!< Kernel we are building
- ir::Liveness *liveness; //!< Liveness info for the variables
- ir::FunctionDAG *dag; //!< Complete DAG of values on the function
- uint32_t simdWidth; //!< Number of lanes per HW threads
+ const ir::Unit &unit; //!< Unit that contains the kernel
+ const ir::Function &fn; //!< Function to compile
+ std::string name; //!< Name of the kernel to compile
+ Kernel *kernel; //!< Kernel we are building
+ ir::Liveness *liveness; //!< Liveness info for the variables
+ ir::FunctionDAG *dag; //!< Complete DAG of values on the function
+ set<ir::LabelIndex> usedLabels; //!< Set of all labels actually used
+ uint32_t simdWidth; //!< Number of lanes per HW threads
};
} /* namespace gbe */
return ker;
}
- static gbe_program genProgramNewFromSource(const char *source) {
- NOT_IMPLEMENTED;
- return NULL;
- }
-
static gbe_program genProgramNewFromBinary(const char *binary, size_t size) {
NOT_IMPLEMENTED;
return NULL;
using namespace gbe;
GenProgram *program = GBE_NEW(GenProgram);
std::string error;
-
// Try to compile the program
if (program->buildFromLLVMFile(fileName, error) == false) {
if (err != NULL && errSize != NULL && stringSize > 0u) {
void genSetupCallBacks(void)
{
- gbe_program_new_from_source = gbe::genProgramNewFromSource;
gbe_program_new_from_binary = gbe::genProgramNewFromBinary;
gbe_program_new_from_llvm = gbe::genProgramNewFromLLVM;
}
}
BVAR(OCL_OUTPUT_GEN_IR, false);
- BVAR(OCL_OUTPUT_LLVM, false);
bool Program::buildFromLLVMFile(const char *fileName, std::string &error) {
ir::Unit unit;
- if (OCL_OUTPUT_LLVM) {
- std::ifstream llvmFile;
- llvmFile.open(fileName);
- if (llvmFile.is_open() == true) {
- std::string line;
- while (llvmFile.good() == true) {
- std::getline(llvmFile ,line);
- std::cout << line << std::endl;
- }
- }
- }
if (llvmToGen(unit, fileName) == false) {
error = std::string(fileName) + " not found";
return false;
GBE_SAFE_DELETE(program);
}
+ extern std::string stdlib_str;
+ static gbe_program programNewFromSource(const char *source,
+ size_t stringSize,
+ char *err,
+ size_t *errSize)
+ {
+ char clStr[L_tmpnam+1], llStr[L_tmpnam+1];
+ const std::string clName = std::string(tmpnam_r(clStr)) + ".cl"; /* unsafe! */
+ const std::string llName = std::string(tmpnam_r(llStr)) + ".ll"; /* unsafe! */
+
+ // Write the source to the cl file
+ FILE *clFile = fopen(clName.c_str(), "w");
+ FATAL_IF(clFile == NULL, "Failed to open temporary file");
+ fwrite(stdlib_str.c_str(), strlen(stdlib_str.c_str()), 1, clFile);
+ fwrite(source, strlen(source), 1, clFile);
+ fclose(clFile);
+
+ // Now compile the code to llvm using clang
+ // XXX use popen and stuff instead of that
+ std::string compileCmd = "clang -emit-llvm -O3 -ccc-host-triple ptx32 -c ";
+ compileCmd += clName;
+ compileCmd += " -o ";
+ compileCmd += llName;
+ if (UNLIKELY(system(compileCmd.c_str()) != 0)) return NULL;
+
+ // Now build the program from llvm
+ return gbe_program_new_from_llvm(llName.c_str(), stringSize, err, errSize);
+ }
+
static uint32_t programGetKernelNum(gbe_program gbeProgram) {
if (gbeProgram == NULL) return 0;
const gbe::Program *program = (const gbe::Program*) gbeProgram;
struct CallBackInitializer
{
CallBackInitializer(void) {
+ gbe_program_new_from_source = gbe::programNewFromSource;
gbe_program_delete = gbe::programDelete;
gbe_program_get_kernel_num = gbe::programGetKernelNum;
gbe_program_get_kernel_by_name = gbe::programGetKernelByName;
};
/*! Create a new program from the given source code (zero terminated string) */
-typedef gbe_program (gbe_program_new_from_source_cb)(const char *source);
+typedef gbe_program (gbe_program_new_from_source_cb)(const char *source,
+ size_t stringSize,
+ char *err,
+ size_t *err_size);
extern gbe_program_new_from_source_cb *gbe_program_new_from_source;
/*! Create a new program from the given blob */
INLINE void SCATTER(const simd_dw<vectorNum> &offset,
const scalar_dw &value,
char *base_address) {
- SCATTER(simd_dw<vectorNum>(value), offset, base_address);
+ SCATTER(offset, simd_dw<vectorNum>(value), base_address);
}
template <uint32_t vectorNum>
INLINE void SCATTER(const scalar_dw &offset,
const simd_dw<vectorNum> &value,
char *base_address) {
- SCATTER(value, simd_dw<vectorNum>(offset), base_address);
+ SCATTER(simd_dw<vectorNum>(offset), value, base_address);
}
#include <cstdio>
/* Gather */
INLINE void LOAD(scalar_dw &dst, const char *ptr) { dst.u = *(const uint32_t *) ptr; }
INLINE void STORE(scalar_dw src, char *ptr) { *(uint32_t *) ptr = src.u; }
INLINE void LOADI(scalar_dw &dst, uint32_t u) { dst.u = u; }
-INLINE void SCATTER(scalar_dw value, scalar_dw offset, char *base) { *(uint32_t*)(base + offset.u) = value.u; }
+INLINE void SCATTER(scalar_dw offset, scalar_dw value, char *base) { *(uint32_t*)(base + offset.u) = value.u; }
INLINE void GATHER(scalar_dw &dst, scalar_dw offset, const char *base) { dst.u = *(const uint32_t*)(base + offset.u); }
//////////////////////////////////////////////////////////////////////////////
"INLINE void SCATTER(const simd_dw<vectorNum> &offset,\n"
" const scalar_dw &value,\n"
" char *base_address) {\n"
-" SCATTER(simd_dw<vectorNum>(value), offset, base_address);\n"
+" SCATTER(offset, simd_dw<vectorNum>(value), base_address);\n"
"}\n"
"template <uint32_t vectorNum>\n"
"INLINE void SCATTER(const scalar_dw &offset,\n"
" const simd_dw<vectorNum> &value,\n"
" char *base_address) {\n"
-" SCATTER(value, simd_dw<vectorNum>(offset), base_address);\n"
+" SCATTER(simd_dw<vectorNum>(offset), value, base_address);\n"
"}\n"
"#include <cstdio>\n"
"/* Gather */\n"
"INLINE void LOAD(scalar_dw &dst, const char *ptr) { dst.u = *(const uint32_t *) ptr; }\n"
"INLINE void STORE(scalar_dw src, char *ptr) { *(uint32_t *) ptr = src.u; }\n"
"INLINE void LOADI(scalar_dw &dst, uint32_t u) { dst.u = u; }\n"
-"INLINE void SCATTER(scalar_dw value, scalar_dw offset, char *base) { *(uint32_t*)(base + offset.u) = value.u; }\n"
+"INLINE void SCATTER(scalar_dw offset, scalar_dw value, char *base) { *(uint32_t*)(base + offset.u) = value.u; }\n"
"INLINE void GATHER(scalar_dw &dst, scalar_dw offset, const char *base) { dst.u = *(const uint32_t*)(base + offset.u); }\n"
"\n"
"//////////////////////////////////////////////////////////////////////////////\n"
void SimContext::emitRegisters(void) {
GBE_ASSERT(fn.getProfile() == ir::PROFILE_OCL);
+
+ // First we build the set of all used registers
+ set<ir::Register> usedRegs;
+ fn.foreachInstruction([&usedRegs](const ir::Instruction &insn) {
+ const uint32_t srcNum = insn.getSrcNum(), dstNum = insn.getDstNum();
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID)
+ usedRegs.insert(insn.getSrc(srcID));
+ for (uint32_t dstID = 0; dstID < dstNum; ++dstID)
+ usedRegs.insert(insn.getDst(dstID));
+ });
+
const uint32_t regNum = fn.regNum();
bool lid0 = false, lid1 = false, lid2 = false; // for local id registers
for (uint32_t regID = 0; regID < regNum; ++regID) {
const ir::Register reg(regID);
+ if (usedRegs.contains(reg) == false) continue;
if (reg == ir::ocl::groupid0 ||
reg == ir::ocl::groupid1 ||
reg == ir::ocl::groupid2)
#undef DECL_INSN
}
if (opcode == OP_LABEL) {
- o << "label" << cast<LabelInstruction>(insn).getLabelIndex() << ":\n";
+ const LabelInstruction labelInsn = cast<LabelInstruction>(insn);
+ const LabelIndex index = labelInsn.getLabelIndex();
+ if (usedLabels.contains(index) == true)
+ o << "label" << index << ":\n";
return;
} else if (opcode == OP_BRA) {
NOT_IMPLEMENTED;
return;
} else if (opcode == OP_RET) {
- std::cout << "BE AWARE OF RET: ONLY ONE RET AT THE END OF THE FUNCTION SHOULD BE OUTPUTTED!";
o << "return;\n";
return;
}
return ker;
}
- static gbe_program simProgramNewFromSource(const char *source) {
- NOT_IMPLEMENTED;
- return NULL;
- }
-
static gbe_program simProgramNewFromBinary(const char *binary, size_t size) {
NOT_IMPLEMENTED;
return NULL;
using namespace gbe;
SimProgram *program = GBE_NEW(SimProgram);
std::string error;
- /* Try to compile the program */
+ // Try to compile the program
if (program->buildFromLLVMFile(fileName, error) == false) {
if (err != NULL && errSize != NULL && stringSize > 0u) {
const size_t msgSize = std::min(error.size(), stringSize-1u);
GBE_DELETE(program);
return NULL;
}
- /* Everything run fine */
+ // Everything run fine
return (gbe_program) program;
}
void simSetupCallBacks(void)
{
- gbe_program_new_from_source = gbe::simProgramNewFromSource;
gbe_program_new_from_binary = gbe::simProgramNewFromBinary;
gbe_program_new_from_llvm = gbe::simProgramNewFromLLVM;
}
// Check first that all branch instructions point to valid labels
for (auto it = usedLabels->begin(); it != usedLabels->end(); ++it)
GBE_ASSERTM(*it != LABEL_IS_POINTED, "A label is used and not defined");
+ fn->sortLabels();
fn->computeCFG();
GBE_DELETE(usedLabels);
const StackElem elem = fnStack.back();
*/
#include "ir/function.hpp"
#include "sys/string.hpp"
+#include "sys/map.hpp"
namespace gbe {
namespace ir {
GBE_DELETE(*it);
}
+ void Function::sortLabels(void) {
+ uint32_t last = 0;
+
+ // Compute the new labels and patch the label instruction
+ map<LabelIndex, LabelIndex> labelMap;
+ foreachInstruction([&](Instruction &insn) {
+ if (insn.getOpcode() != OP_LABEL) return;
+
+ // Create the new label
+ Instruction *newLabel = newInstruction();
+ *newLabel = LABEL(LabelIndex(last));
+
+ // Replace the previous label instruction
+ LabelInstruction &label = cast<LabelInstruction>(insn);
+ const LabelIndex index = label.getLabelIndex();
+ labelMap.insert(std::make_pair(index, LabelIndex(last++)));
+ newLabel->replace(&insn);
+ });
+
+ // Patch all branch instructions with the new labels
+ foreachInstruction([&](Instruction &insn) {
+ if (insn.getOpcode() != OP_BRA) return;
+
+ // Get the current branch instruction
+ BranchInstruction &bra = cast<BranchInstruction>(insn);
+ const LabelIndex index = bra.getLabelIndex();
+ const LabelIndex newIndex = labelMap.find(index)->second;
+
+ // Insert the patched branch instruction
+ Instruction *newBra = newInstruction();
+ if (bra.isPredicated() == true)
+ *newBra = BRA(newIndex, bra.getPredicateIndex());
+ else
+ *newBra = BRA(newIndex);
+ newBra->replace(&insn);
+ });
+ }
+
LabelIndex Function::newLabel(void) {
GBE_ASSERTM(labels.size() < 0xffff,
"Too many labels are defined (65536 only are supported)");
INLINE void foreach(const T &functor) const {
Instruction *curr = first;
while (curr) {
+ // Be aware the current instruction can be destroyed in functor
+ Instruction *succ = curr->getSuccessor();
functor(*curr);
- curr = curr->getSuccessor();
+ curr = succ;
}
}
/*! Apply the given functor on all instructions (reverse order) */
INLINE void rforeach(const T &functor) const {
Instruction *curr = last;
while (curr) {
+ // Be aware the current instruction can be destroyed in functor
+ Instruction *pred = curr->getPredecessor();
functor(*curr);
- curr = curr->getPredecessor();
+ curr = pred;
}
}
/*! Get the parent function */
/*! Get the next and previous allocated block */
BasicBlock *getNextBlock(void) const { return this->nextBlock; }
BasicBlock *getPrevBlock(void) const { return this->prevBlock; }
- /*! Get the first and last instructions */
+ /*! Get / set the first and last instructions */
Instruction *getFirstInstruction(void) const { return this->first; }
Instruction *getLastInstruction(void) const { return this->last; }
+ void setFirstInstruction(Instruction *insn) { this->first = insn; }
+ void setLastInstruction(Instruction *insn) { this->last = insn; }
/*! Get successors and predecessors */
const BlockSet &getSuccessorSet(void) const { return successors; }
const BlockSet &getPredecessorSet(void) const { return predecessors; }
LabelIndex newLabel(void);
/*! Create the control flow graph */
void computeCFG(void);
+ /*! Sort the labels in increasing orders (ie top block has the smallest
+ * labels)
+ */
+ void sortLabels(void);
/*! Number of registers in the register file */
INLINE uint32_t regNum(void) const { return file.regNum(); }
/*! Number of register tuples in the register file */
GBE_ASSERT(bb != NULL);
return bb->getParent();
}
+ Function &Instruction::getFunction(void) {
+ BasicBlock *bb = this->getParent();
+ GBE_ASSERT(bb != NULL);
+ return bb->getParent();
+ }
+
+ void Instruction::replace(Instruction *other) {
+ Function &fn = other->getFunction();
+ BasicBlock *bb = other->getParent();
+ if (bb->getFirstInstruction() == other) bb->setFirstInstruction(this);
+ if (bb->getLastInstruction() == other) bb->setLastInstruction(this);
+ if (other->predecessor) other->predecessor->successor = this;
+ if (other->successor) other->successor->predecessor = this;
+ this->parent = other->parent;
+ this->predecessor = other->predecessor;
+ this->successor = other->successor;
+ fn.deleteInstruction(other);
+ }
#define DECL_MEM_FN(CLASS, RET, PROTOTYPE, CALL) \
RET CLASS::PROTOTYPE const { \
void setParent(BasicBlock *block) { this->parent = block; }
/*! Get the function from the parent basic block */
const Function &getFunction(void) const;
+ Function &getFunction(void);
/*! Check that the instruction is well formed (type properly match,
* registers not of bound and so on). If not well formed, provide a reason
* in string why
*/
bool wellFormed(const Function &fn, std::string &why) const;
+ /*! Replace other by this instruction */
+ void replace(Instruction *other);
/*! Indicates if the instruction belongs to instruction type T. Typically, T
* can be BinaryInstruction, UnaryInstruction, LoadInstruction and so on
*/
const ValueDef *getDefAddress(const Register ®) const;
/*! Get the pointer to the use *as stored in the DAG* */
const ValueUse *getUseAddress(const Instruction *insn, uint32_t srcID) const;
+ /*! Get the set of all uses for the register */
+ const UseSet *getRegUse(const Register ®) const;
+ /*! Get the set of all definitions for the register */
+ const DefSet *getRegDef(const Register ®) const;
/*! Get the function we have the graph for */
- const Function &getFunction(void) const { return fn; }
+ INLINE const Function &getFunction(void) const { return fn; }
/*! The DefSet for each definition use */
typedef map<ValueUse, DefSet*> UDGraph;
/*! The UseSet for each definition */
#include "llvm/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/IRReader.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/llvm_gen_backend.hpp"
#include "llvm/llvm_to_gen.hpp"
+#include "sys/cvar.hpp"
#include "sys/platform.hpp"
namespace gbe
{
+ BVAR(OCL_OUTPUT_LLVM, false);
+ BVAR(OCL_OUTPUT_LLVM_BEFORE_EXTRA_PASS, false);
+
bool llvmToGen(ir::Unit &unit, const char *fileName)
{
using namespace llvm;
// Get the global LLVM context
llvm::LLVMContext& c = llvm::getGlobalContext();
+ std::string errInfo;
+ llvm::raw_fd_ostream o("-", errInfo);
// Get the module from its file
SMDiagnostic Err;
Module &mod = *M.get();
llvm::PassManager passes;
+
+ // Print the code before further optimizations
+ if (OCL_OUTPUT_LLVM_BEFORE_EXTRA_PASS)
+ passes.add(createPrintModulePass(&o));
passes.add(createScalarReplAggregatesPass()); // Break up allocas
passes.add(createRemoveGEPPass(unit));
passes.add(createConstantPropagationPass());
- passes.add(createDeadInstEliminationPass()); // remove simplified instructions
+ passes.add(createDeadInstEliminationPass()); // Remove simplified instructions
passes.add(createLowerSwitchPass());
passes.add(createPromoteMemoryToRegisterPass());
passes.add(createGVNPass()); // Remove redundancies
passes.add(createGenPass(unit));
+
+ // Print the code extra optimization passes
+ if (OCL_OUTPUT_LLVM)
+ passes.add(createPrintModulePass(&o));
passes.run(mod);
return true;
}
--- /dev/null
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+#define DECL_INTERNAL_WORK_ITEM_FN(NAME) \
+__attribute__((pure,const)) unsigned int __gen_ocl_##NAME##0(void); \
+__attribute__((pure,const)) unsigned int __gen_ocl_##NAME##1(void); \
+__attribute__((pure,const)) unsigned int __gen_ocl_##NAME##2(void);
+DECL_INTERNAL_WORK_ITEM_FN(get_group_id)
+DECL_INTERNAL_WORK_ITEM_FN(get_local_id)
+DECL_INTERNAL_WORK_ITEM_FN(get_local_size)
+DECL_INTERNAL_WORK_ITEM_FN(get_global_size)
+DECL_INTERNAL_WORK_ITEM_FN(get_num_groups)
+#undef DECL_INTERNAL_WORK_ITEM_FN
+
+#define DECL_PUBLIC_WORK_ITEM_FN(NAME) \
+inline unsigned NAME(unsigned int dim) { \
+ if (dim == 0) return __gen_ocl_##NAME##0(); \
+ else if (dim == 1) return __gen_ocl_##NAME##1(); \
+ else if (dim == 2) return __gen_ocl_##NAME##2(); \
+ else return 0; \
+}
+DECL_PUBLIC_WORK_ITEM_FN(get_group_id)
+DECL_PUBLIC_WORK_ITEM_FN(get_local_id)
+DECL_PUBLIC_WORK_ITEM_FN(get_local_size)
+DECL_PUBLIC_WORK_ITEM_FN(get_global_size)
+DECL_PUBLIC_WORK_ITEM_FN(get_num_groups)
+#undef DECL_PUBLIC_WORK_ITEM_FN
+
+inline unsigned int get_global_id(unsigned int dim) {
+ return get_local_id(dim) + get_local_size(dim) * get_group_id(dim);
+}
+
+__attribute__ ((pure,const,overloadable)) float mad(float a, float b, float c);
+__attribute__((overloadable)) inline unsigned select(unsigned src0, unsigned src1, unsigned cond) {
+ return cond ? src0 : src1;
+}
+__attribute__((overloadable)) inline int select(int src0, int src1, int cond) {
+ return cond ? src0 : src1;
+}
+
+typedef float float2 __attribute__((ext_vector_type(2)));
+typedef float float3 __attribute__((ext_vector_type(3)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef int int2 __attribute__((ext_vector_type(2)));
+typedef int int3 __attribute__((ext_vector_type(3)));
+typedef int int4 __attribute__((ext_vector_type(4)));
+typedef int uint2 __attribute__((ext_vector_type(2)));
+typedef unsigned uint3 __attribute__((ext_vector_type(3)));
+typedef unsigned uint4 __attribute__((ext_vector_type(4)));
+typedef bool bool2 __attribute__((ext_vector_type(2)));
+typedef bool bool3 __attribute__((ext_vector_type(3)));
+typedef bool bool4 __attribute__((ext_vector_type(4)));
+
+// This will be optimized out by LLVM and will output LLVM select instructions
+#define DECL_SELECT4(TYPE4, TYPE, COND_TYPE4, MASK) \
+__attribute__((overloadable)) \
+inline TYPE4 select(TYPE4 src0, TYPE4 src1, COND_TYPE4 cond) { \
+ TYPE4 dst; \
+ const TYPE x0 = src0.x; /* Fix performance issue with CLANG */ \
+ const TYPE x1 = src1.x; \
+ const TYPE y0 = src0.y; \
+ const TYPE y1 = src1.y; \
+ const TYPE z0 = src0.z; \
+ const TYPE z1 = src1.z; \
+ const TYPE w0 = src0.w; \
+ const TYPE w1 = src1.w; \
+ \
+ dst.x = (cond.x & MASK) ? x1 : x0; \
+ dst.y = (cond.y & MASK) ? y1 : y0; \
+ dst.z = (cond.z & MASK) ? z1 : z0; \
+ dst.w = (cond.w & MASK) ? w1 : w0; \
+ return dst; \
+}
+DECL_SELECT4(int4, int, int4, 0x80000000)
+DECL_SELECT4(float4, float, int4, 0x80000000)
+#undef DECL_SELECT4
+
+__attribute__((overloadable,always_inline)) inline float2 mad(float2 a, float2 b, float2 c) {
+ return (float2)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y));
+}
+__attribute__((overloadable,always_inline)) inline float3 mad(float3 a, float3 b, float3 c) {
+ return (float3)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y), mad(a.z,b.z,c.z));
+}
+__attribute__((overloadable,always_inline)) inline float4 mad(float4 a, float4 b, float4 c) {
+ return (float4)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y),
+ mad(a.z,b.z,c.z), mad(a.w,b.w,c.w));
+}
+
+#define __private __attribute__((address_space(0)))
+#define __global __attribute__((address_space(1)))
+#define __constant __attribute__((address_space(2)))
+//#define __local __attribute__((address_space(3)))
+#define global __global
+//#define local __local
+#define constant __constant
+#define private __private
+
+#define NULL ((void*)0)
--- /dev/null
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+#include "string"
+namespace gbe {
+std::string stdlib_str =
+"#define DECL_INTERNAL_WORK_ITEM_FN(NAME) \\\n"
+"__attribute__((pure,const)) unsigned int __gen_ocl_##NAME##0(void); \\\n"
+"__attribute__((pure,const)) unsigned int __gen_ocl_##NAME##1(void); \\\n"
+"__attribute__((pure,const)) unsigned int __gen_ocl_##NAME##2(void);\n"
+"DECL_INTERNAL_WORK_ITEM_FN(get_group_id)\n"
+"DECL_INTERNAL_WORK_ITEM_FN(get_local_id)\n"
+"DECL_INTERNAL_WORK_ITEM_FN(get_local_size)\n"
+"DECL_INTERNAL_WORK_ITEM_FN(get_global_size)\n"
+"DECL_INTERNAL_WORK_ITEM_FN(get_num_groups)\n"
+"#undef DECL_INTERNAL_WORK_ITEM_FN\n"
+"\n"
+"#define DECL_PUBLIC_WORK_ITEM_FN(NAME) \\\n"
+"inline unsigned NAME(unsigned int dim) { \\\n"
+" if (dim == 0) return __gen_ocl_##NAME##0(); \\\n"
+" else if (dim == 1) return __gen_ocl_##NAME##1(); \\\n"
+" else if (dim == 2) return __gen_ocl_##NAME##2(); \\\n"
+" else return 0; \\\n"
+"}\n"
+"DECL_PUBLIC_WORK_ITEM_FN(get_group_id)\n"
+"DECL_PUBLIC_WORK_ITEM_FN(get_local_id)\n"
+"DECL_PUBLIC_WORK_ITEM_FN(get_local_size)\n"
+"DECL_PUBLIC_WORK_ITEM_FN(get_global_size)\n"
+"DECL_PUBLIC_WORK_ITEM_FN(get_num_groups)\n"
+"#undef DECL_PUBLIC_WORK_ITEM_FN\n"
+"\n"
+"inline unsigned int get_global_id(unsigned int dim) {\n"
+" return get_local_id(dim) + get_local_size(dim) * get_group_id(dim);\n"
+"}\n"
+"\n"
+"__attribute__ ((pure,const,overloadable)) float mad(float a, float b, float c);\n"
+"__attribute__((overloadable)) inline unsigned select(unsigned src0, unsigned src1, unsigned cond) {\n"
+" return cond ? src0 : src1;\n"
+"}\n"
+"__attribute__((overloadable)) inline int select(int src0, int src1, int cond) {\n"
+" return cond ? src0 : src1;\n"
+"}\n"
+"\n"
+"typedef float float2 __attribute__((ext_vector_type(2)));\n"
+"typedef float float3 __attribute__((ext_vector_type(3)));\n"
+"typedef float float4 __attribute__((ext_vector_type(4)));\n"
+"typedef int int2 __attribute__((ext_vector_type(2)));\n"
+"typedef int int3 __attribute__((ext_vector_type(3)));\n"
+"typedef int int4 __attribute__((ext_vector_type(4)));\n"
+"typedef int uint2 __attribute__((ext_vector_type(2)));\n"
+"typedef unsigned uint3 __attribute__((ext_vector_type(3)));\n"
+"typedef unsigned uint4 __attribute__((ext_vector_type(4)));\n"
+"typedef bool bool2 __attribute__((ext_vector_type(2)));\n"
+"typedef bool bool3 __attribute__((ext_vector_type(3)));\n"
+"typedef bool bool4 __attribute__((ext_vector_type(4)));\n"
+"\n"
+"// This will be optimized out by LLVM and will output LLVM select instructions\n"
+"#define DECL_SELECT4(TYPE4, TYPE, COND_TYPE4, MASK) \\\n"
+"__attribute__((overloadable)) \\\n"
+"inline TYPE4 select(TYPE4 src0, TYPE4 src1, COND_TYPE4 cond) { \\\n"
+" TYPE4 dst; \\\n"
+" const TYPE x0 = src0.x; /* Fix performance issue with CLANG */ \\\n"
+" const TYPE x1 = src1.x; \\\n"
+" const TYPE y0 = src0.y; \\\n"
+" const TYPE y1 = src1.y; \\\n"
+" const TYPE z0 = src0.z; \\\n"
+" const TYPE z1 = src1.z; \\\n"
+" const TYPE w0 = src0.w; \\\n"
+" const TYPE w1 = src1.w; \\\n"
+" \\\n"
+" dst.x = (cond.x & MASK) ? x1 : x0; \\\n"
+" dst.y = (cond.y & MASK) ? y1 : y0; \\\n"
+" dst.z = (cond.z & MASK) ? z1 : z0; \\\n"
+" dst.w = (cond.w & MASK) ? w1 : w0; \\\n"
+" return dst; \\\n"
+"}\n"
+"DECL_SELECT4(int4, int, int4, 0x80000000)\n"
+"DECL_SELECT4(float4, float, int4, 0x80000000)\n"
+"#undef DECL_SELECT4\n"
+"\n"
+"__attribute__((overloadable,always_inline)) inline float2 mad(float2 a, float2 b, float2 c) {\n"
+" return (float2)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y));\n"
+"}\n"
+"__attribute__((overloadable,always_inline)) inline float3 mad(float3 a, float3 b, float3 c) {\n"
+" return (float3)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y), mad(a.z,b.z,c.z));\n"
+"}\n"
+"__attribute__((overloadable,always_inline)) inline float4 mad(float4 a, float4 b, float4 c) {\n"
+" return (float4)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y),\n"
+" mad(a.z,b.z,c.z), mad(a.w,b.w,c.w));\n"
+"}\n"
+"\n"
+"#define __private __attribute__((address_space(0)))\n"
+"#define __global __attribute__((address_space(1)))\n"
+"#define __constant __attribute__((address_space(2)))\n"
+"//#define __local __attribute__((address_space(3)))\n"
+"#define global __global\n"
+"//#define local __local\n"
+"#define constant __constant\n"
+"#define private __private\n"
+"\n"
+"#define NULL ((void*)0)\n"
+;
+}
+
+ std::string(file)
+ ", function " + std::string(fn)
+ ", line " + std::string(lineString);
- // assert(0);
+ assert(0);
throw Exception(str);
}
} /* namespace gbe */
LOAD(_##INDEX##g, (const char *) (gatherOffsets+index##INDEX));\
LOAD(_##INDEX##s, (const char *) (scatterOffsets+index##INDEX));\
GATHER(_##INDEX, _##INDEX##g, (const char *) data);\
- SCATTER(_##INDEX, _##INDEX##s, (char *) dst);\
+ SCATTER(_##INDEX##s, _##INDEX, (char *) dst);\
for (uint32_t i = 0; i < elemNum(_##INDEX); ++i)\
GBE_ASSERT(data[gatherOffsets[index##INDEX+i] / sizeof(uint32_t)] ==\
dst[scatterOffsets[index##INDEX+i] / sizeof(uint32_t)]);