From 4816fe819480c5bdd51c704fa3bae4d6c2eb4b03 Mon Sep 17 00:00:00 2001 From: Benjamin Segovia Date: Thu, 15 Mar 2012 14:15:33 +0000 Subject: [PATCH] Added proper bookkeeping for the argument types Finished the liveness pretty printer --- backend/kernels/add.cl | 3 +- backend/kernels/add2.cl | 3 +- backend/kernels/cmp.cl | 3 +- backend/kernels/cmp_cvt.cl | 3 +- backend/kernels/cycle.cl | 3 +- backend/kernels/extract.cl | 3 +- backend/kernels/function.cl | 3 +- backend/kernels/function_param.cl | 3 +- backend/kernels/get_global_id.cl | 3 +- backend/kernels/insert.cl | 3 +- backend/kernels/load_store.cl | 1 + backend/kernels/loop.cl | 8 +++- backend/kernels/loop.ll | 12 +++--- backend/kernels/mad.cl | 3 +- backend/kernels/select.cl | 3 +- backend/kernels/short.cl | 3 +- backend/kernels/shuffle.cl | 3 +- backend/kernels/simple_float4.cl | 3 +- backend/kernels/simple_float4_2.cl | 3 +- backend/kernels/simple_float4_3.cl | 3 +- backend/kernels/stdlib.h | 6 +++ backend/kernels/store.cl | 3 +- backend/kernels/struct.cl | 3 +- backend/kernels/struct2.cl | 3 +- backend/kernels/test_select.cl | 3 +- backend/kernels/undefined.cl | 3 +- backend/kernels/void.cl | 3 +- backend/src/ir/context.cpp | 5 ++- backend/src/ir/context.hpp | 6 +-- backend/src/ir/function.cpp | 17 +++++++- backend/src/ir/function.hpp | 28 +++++++++++-- backend/src/ir/instruction.cpp | 40 +++++++++--------- backend/src/ir/instruction.hpp | 14 +++---- backend/src/ir/liveness.cpp | 78 +++++++++++++++++++++++++---------- backend/src/llvm/llvm_gen_backend.cpp | 70 ++++++++++++++++++++++--------- 35 files changed, 246 insertions(+), 108 deletions(-) diff --git a/backend/kernels/add.cl b/backend/kernels/add.cl index e03781d..9285efd 100644 --- a/backend/kernels/add.cl +++ b/backend/kernels/add.cl @@ -1,6 +1,7 @@ -#include +#include "stdlib.h" __kernel unsigned int add(unsigned int x, unsigned int y) { return x + y; } + diff --git a/backend/kernels/add2.cl b/backend/kernels/add2.cl index 58a8d2f..3136994 100644 --- a/backend/kernels/add2.cl +++ b/backend/kernels/add2.cl @@ -1,4 +1,4 @@ -#include +#include "stdlib.h" struct big{ unsigned int a, b; }; @@ -11,3 +11,4 @@ __kernel struct big add(unsigned int x, unsigned int y) return p; } + diff --git a/backend/kernels/cmp.cl b/backend/kernels/cmp.cl index 789f852..fc5bd67 100644 --- a/backend/kernels/cmp.cl +++ b/backend/kernels/cmp.cl @@ -1,6 +1,7 @@ -#include +#include "stdlib.h" __kernel void test_cmp(__global bool *dst, int x, int y, float z, float w) { dst[0] = (x < y) + (z > w); } + diff --git a/backend/kernels/cmp_cvt.cl b/backend/kernels/cmp_cvt.cl index ab39fba..bb289ae 100644 --- a/backend/kernels/cmp_cvt.cl +++ b/backend/kernels/cmp_cvt.cl @@ -1,7 +1,8 @@ -#include +#include "stdlib.h" __kernel void cmp_cvt(__global int *dst, int x, int y) { dst[0] = x + y < get_local_id(0) ; } + diff --git a/backend/kernels/cycle.cl b/backend/kernels/cycle.cl index 8b0be54..3797bfd 100644 --- a/backend/kernels/cycle.cl +++ b/backend/kernels/cycle.cl @@ -1,4 +1,4 @@ -#include +#include "stdlib.h" __kernel void cycle(global int *dst) { int x, y; @@ -13,3 +13,4 @@ hop1: dst[0] = x; } + diff --git a/backend/kernels/extract.cl b/backend/kernels/extract.cl index fb8a5c5..a350575 100644 --- a/backend/kernels/extract.cl +++ b/backend/kernels/extract.cl @@ -1,7 +1,8 @@ -#include +#include "stdlib.h" __kernel void extract(__global int4 *dst, __global int4 *src, int c) { const int4 from = src[0]; dst[0] = (int4)(from.x, 1, 2, 3); } + diff --git a/backend/kernels/function.cl b/backend/kernels/function.cl index 0cc6873..2cd6ef2 100644 --- a/backend/kernels/function.cl +++ b/backend/kernels/function.cl @@ -1,4 +1,4 @@ -#include +#include "stdlib.h" void write(__global int *dst) { @@ -10,3 +10,4 @@ __kernel void write2(__global int *dst, int x) write(dst); dst[x] = 1; } + diff --git a/backend/kernels/function_param.cl b/backend/kernels/function_param.cl index 46a7fd0..2558f8c 100644 --- a/backend/kernels/function_param.cl +++ b/backend/kernels/function_param.cl @@ -1,4 +1,4 @@ -#include +#include "stdlib.h" struct struct0 { int hop[5]; @@ -12,3 +12,4 @@ __kernel void param(__global struct struct0 *dst, struct struct0 s, __local int dst[0].y += y; } + diff --git a/backend/kernels/get_global_id.cl b/backend/kernels/get_global_id.cl index 6c2b554..9053763 100644 --- a/backend/kernels/get_global_id.cl +++ b/backend/kernels/get_global_id.cl @@ -1,4 +1,4 @@ -#include +#include "stdlib.h" __kernel void test_global_id(__global int *dst, __global int *p) { @@ -7,3 +7,4 @@ __kernel void test_global_id(__global int *dst, __global int *p) p[get_global_id(0)] = get_local_id(0); } + diff --git a/backend/kernels/insert.cl b/backend/kernels/insert.cl index 6497c8c..429b54f 100644 --- a/backend/kernels/insert.cl +++ b/backend/kernels/insert.cl @@ -1,4 +1,4 @@ -#include +#include "stdlib.h" __kernel void insert(__global int4 *dst, __global int4 *src, int c) { @@ -7,3 +7,4 @@ __kernel void insert(__global int4 *dst, __global int4 *src, int c) dst[0] = src[0]; } + diff --git a/backend/kernels/load_store.cl b/backend/kernels/load_store.cl index f88e4cc..fdff43b 100644 --- a/backend/kernels/load_store.cl +++ b/backend/kernels/load_store.cl @@ -3,3 +3,4 @@ __kernel void load_store(__local int *dst, __local int *src) dst[0] = src[0]; } + diff --git a/backend/kernels/loop.cl b/backend/kernels/loop.cl index af64abd..552a066 100644 --- a/backend/kernels/loop.cl +++ b/backend/kernels/loop.cl @@ -1,6 +1,10 @@ -#include -__kernel void add(__global int *dst, unsigned int x) +#include "stdlib.h" + +struct big { int x[10]; }; + +__kernel void add(__global int *dst, unsigned int x, struct big b) { for (int i = 0; i < x; ++i) dst[i]++; } + diff --git a/backend/kernels/loop.ll b/backend/kernels/loop.ll index c67faf7..9d33968 100644 --- a/backend/kernels/loop.ll +++ b/backend/kernels/loop.ll @@ -2,17 +2,19 @@ target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64" target triple = "ptx32--" -define ptx_kernel void @add(i32* nocapture %dst, i32 %x) nounwind noinline { +%struct.big = type { [10 x i32] } + +define ptx_kernel void @add(i32 addrspace(1)* nocapture %dst, i32 %x, %struct.big* nocapture byval %b) nounwind noinline { entry: %cmp2 = icmp eq i32 %x, 0 br i1 %cmp2, label %for.end, label %for.body for.body: ; preds = %for.body, %entry %i.03 = phi i32 [ %inc1, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32* %dst, i32 %i.03 - %0 = load i32* %arrayidx, align 4, !tbaa !1 + %arrayidx = getelementptr inbounds i32 addrspace(1)* %dst, i32 %i.03 + %0 = load i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 %inc = add nsw i32 %0, 1 - store i32 %inc, i32* %arrayidx, align 4, !tbaa !1 + store i32 %inc, i32 addrspace(1)* %arrayidx, align 4, !tbaa !1 %inc1 = add nsw i32 %i.03, 1 %exitcond = icmp eq i32 %inc1, %x br i1 %exitcond, label %for.end, label %for.body @@ -23,7 +25,7 @@ for.end: ; preds = %for.body, %entry !opencl.kernels = !{!0} -!0 = metadata !{void (i32*, i32)* @add} +!0 = metadata !{void (i32 addrspace(1)*, i32, %struct.big*)* @add} !1 = metadata !{metadata !"int", metadata !2} !2 = metadata !{metadata !"omnipotent char", metadata !3} !3 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/backend/kernels/mad.cl b/backend/kernels/mad.cl index 14c5987..5875a9b 100644 --- a/backend/kernels/mad.cl +++ b/backend/kernels/mad.cl @@ -1,4 +1,4 @@ -#include +#include "stdlib.h" __attribute__((pure, overloadable)) int mad(int,int,int); __attribute__((pure, overloadable)) float mad(float,float,float); __attribute__((pure, overloadable)) float4 mad(float4,float4,float4); @@ -15,3 +15,4 @@ __kernel void add(__global int *dst, unsigned int x, float z) } } + diff --git a/backend/kernels/select.cl b/backend/kernels/select.cl index 8f35915..0bf8141 100644 --- a/backend/kernels/select.cl +++ b/backend/kernels/select.cl @@ -1,4 +1,4 @@ -#include +#include "stdlib.h" __kernel void test_select(__global int4 *dst, __global int4 *src0, @@ -7,3 +7,4 @@ __kernel void test_select(__global int4 *dst, const int4 from = select(src0[0], src0[1], src0[1]); dst[0] = from; } + diff --git a/backend/kernels/short.cl b/backend/kernels/short.cl index e2d5b19..246cf02 100644 --- a/backend/kernels/short.cl +++ b/backend/kernels/short.cl @@ -1,6 +1,7 @@ -#include +#include "stdlib.h" __kernel void short_write(__global short *dst, short x, short y) { dst[0] = x + y; } + diff --git a/backend/kernels/shuffle.cl b/backend/kernels/shuffle.cl index b39ae85..45d144e 100644 --- a/backend/kernels/shuffle.cl +++ b/backend/kernels/shuffle.cl @@ -1,7 +1,8 @@ -#include +#include "stdlib.h" __kernel void shuffle(__global int4 *dst, __global int4 *src, int c) { const int4 from = src[0]; dst[0] = from.xywz; } + diff --git a/backend/kernels/simple_float4.cl b/backend/kernels/simple_float4.cl index d9dbe51..743ceea 100644 --- a/backend/kernels/simple_float4.cl +++ b/backend/kernels/simple_float4.cl @@ -1,7 +1,8 @@ -#include +#include "stdlib.h" __kernel void simple_float4(__global float4 *dst, __global float4 *src) { dst[get_global_id(0)] = src[get_global_id(0)]; } + diff --git a/backend/kernels/simple_float4_2.cl b/backend/kernels/simple_float4_2.cl index 6788511..c35d9bb 100644 --- a/backend/kernels/simple_float4_2.cl +++ b/backend/kernels/simple_float4_2.cl @@ -1,7 +1,8 @@ -#include +#include "stdlib.h" __kernel void simple_float4(__global float4 *dst, __global float4 *src) { dst[get_global_id(0)] = src[get_global_id(0)] * src[get_global_id(0)]; } + diff --git a/backend/kernels/simple_float4_3.cl b/backend/kernels/simple_float4_3.cl index 3d20f42..25c8fe4 100644 --- a/backend/kernels/simple_float4_3.cl +++ b/backend/kernels/simple_float4_3.cl @@ -1,4 +1,4 @@ -#include +#include "stdlib.h" __kernel void simple_float4(__global float4 *dst, __global float4 *src, bool b) { @@ -6,3 +6,4 @@ __kernel void simple_float4(__global float4 *dst, __global float4 *src, bool b) dst[get_global_id(0)] += (float4) (src[2].x, 1.f, 2.f, 3.f); } + diff --git a/backend/kernels/stdlib.h b/backend/kernels/stdlib.h index 8ee4b83..0701ff8 100644 --- a/backend/kernels/stdlib.h +++ b/backend/kernels/stdlib.h @@ -77,6 +77,12 @@ __attribute__((overloadable)) inline int4 select(int4 src0, int4 src1, int4 cond return dst; } +#define __private __attribute__((address_space(0))) #define __global __attribute__((address_space(1))) +#define __constant __attribute__((address_space(2))) +#define __local __attribute__((address_space(3))) #define global __global +#define local __local +#define constant __constant +#define private __private diff --git a/backend/kernels/store.cl b/backend/kernels/store.cl index 337ba02..5c47378 100644 --- a/backend/kernels/store.cl +++ b/backend/kernels/store.cl @@ -1,6 +1,7 @@ -#include +#include "stdlib.h" __kernel void store(__global int *dst, __local int *dst0, int x) { dst[0] = 1; } + diff --git a/backend/kernels/struct.cl b/backend/kernels/struct.cl index d72de6e..8be397d 100644 --- a/backend/kernels/struct.cl +++ b/backend/kernels/struct.cl @@ -1,4 +1,4 @@ -#include +#include "stdlib.h" struct my_struct { int a; int b[2]; @@ -23,3 +23,4 @@ __kernel void struct_cl (struct my_struct s, int x, __global int *mem, int y) mem[0] = s.a + array[x].a + array[x+1].b[0] + g[x] + g[3]; } + diff --git a/backend/kernels/struct2.cl b/backend/kernels/struct2.cl index adaace3..31269f4 100644 --- a/backend/kernels/struct2.cl +++ b/backend/kernels/struct2.cl @@ -1,4 +1,4 @@ -#include +#include "stdlib.h" struct my_struct { int a; int b[2]; @@ -19,3 +19,4 @@ __kernel void struct_cl (struct my_struct s, int x, __global struct my_struct *m mem[0] = hop; } + diff --git a/backend/kernels/test_select.cl b/backend/kernels/test_select.cl index 8676c0d..ff4284b 100644 --- a/backend/kernels/test_select.cl +++ b/backend/kernels/test_select.cl @@ -1,4 +1,4 @@ -#include +#include "stdlib.h" __kernel void test_select(__global int *dst, __global int *src) { @@ -8,3 +8,4 @@ __kernel void test_select(__global int *dst, __global int *src) dst[get_global_id(0)] = 2; } + diff --git a/backend/kernels/undefined.cl b/backend/kernels/undefined.cl index a1df672..b1e5294 100644 --- a/backend/kernels/undefined.cl +++ b/backend/kernels/undefined.cl @@ -1,4 +1,4 @@ -#include +#include "stdlib.h" __kernel void undefined(__global int *dst) { int x; @@ -8,3 +8,4 @@ __kernel void undefined(__global int *dst) dst[0] = 1; } + diff --git a/backend/kernels/void.cl b/backend/kernels/void.cl index a5a5331..fd9b4bd 100644 --- a/backend/kernels/void.cl +++ b/backend/kernels/void.cl @@ -1,3 +1,4 @@ -#include +#include "stdlib.h" __kernel void hop() {} + diff --git a/backend/src/ir/context.cpp b/backend/src/ir/context.cpp index 110a0a7..9885c39 100644 --- a/backend/src/ir/context.cpp +++ b/backend/src/ir/context.cpp @@ -80,10 +80,11 @@ namespace ir { return index; } - void Context::input(Register reg) { + void Context::input(FunctionInput::Type type, Register reg, uint32_t elementSize) { GBE_ASSERTM(fn != NULL, "No function currently defined"); GBE_ASSERTM(reg < fn->file.regNum(), "Out-of-bound register"); - fn->inputs.push_back(reg); + const FunctionInput input(type, reg, elementSize); + fn->inputs.push_back(input); } void Context::output(Register reg) { diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp index 23bc048..f66b20a 100644 --- a/backend/src/ir/context.hpp +++ b/backend/src/ir/context.hpp @@ -72,7 +72,7 @@ namespace ir { /*! Create a new label for the current function */ LabelIndex label(void); /*! Append a new input register for the function */ - void input(Register reg); + void input(FunctionInput::Type type, Register reg, uint32_t elemSz = 0u); /*! Append a new output register for the function */ void output(Register reg); /*! Get the immediate value */ @@ -119,7 +119,7 @@ namespace ir { /*! LOAD with the destinations directly specified */ template - void LOAD(Type type, Register offset, MemorySpace space, bool dwAligned, Args...values) + void LOAD(Type type, Register offset, AddressSpace space, bool dwAligned, Args...values) { const Tuple index = this->tuple(values...); const uint16_t valueNum = std::tuple_size>::value; @@ -129,7 +129,7 @@ namespace ir { /*! STORE with the sources directly specified */ template - void STORE(Type type, Register offset, MemorySpace space, bool dwAligned, Args...values) + void STORE(Type type, Register offset, AddressSpace space, bool dwAligned, Args...values) { const Tuple index = this->tuple(values...); const uint16_t valueNum = std::tuple_size>::value; diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp index 8244ef4..e3e343e 100644 --- a/backend/src/ir/function.cpp +++ b/backend/src/ir/function.cpp @@ -99,8 +99,21 @@ namespace ir { out << fn.getRegisterFile(); out << "## " << fn.inputNum() << " input register" << plural(fn.inputNum()) << " ##" << std::endl; - for (uint32_t i = 0; i < fn.inputNum(); ++i) - out << "decl_input %" << fn.getInput(i) << std::endl; + for (uint32_t i = 0; i < fn.inputNum(); ++i) { + const FunctionInput &input = fn.getInput(i); + out << "decl_input."; + switch (input.type) { + case FunctionInput::GLOBAL_POINTER: out << "global"; break; + case FunctionInput::LOCAL_POINTER: out << "local"; break; + case FunctionInput::CONSTANT_POINTER: out << "constant"; break; + case FunctionInput::VALUE: out << "value"; break; + case FunctionInput::STRUCTURE: + out << "structure." << input.elementSize; + break; + default: break; + } + out << " %" << input.reg << std::endl; + } out << "## " << fn.outputNum() << " output register" << plural(fn.outputNum()) << " ##" << std::endl; for (uint32_t i = 0; i < fn.outputNum(); ++i) diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp index 76d39bd..a884047 100644 --- a/backend/src/ir/function.hpp +++ b/backend/src/ir/function.hpp @@ -87,6 +87,28 @@ namespace ir { GBE_CLASS(BasicBlock); }; + /*! In fine, function inputs (arguments) can be pushed from the constant + * buffer if they are structures. Other arguments can be images (textures) + * and will also require special treatment. + */ + struct FunctionInput + { + enum Type + { + GLOBAL_POINTER = 0, /* __global */ + CONSTANT_POINTER = 1, /* __constant */ + LOCAL_POINTER = 2, /* __local */ + VALUE = 3, /* int, float */ + STRUCTURE = 4 /* struct foo */ + }; + /*! Create a function input */ + INLINE FunctionInput(Type type, Register reg, uint32_t elementSize = 0u) : + type(type), reg(reg), elementSize(elementSize) {} + Type type; /*! Gives the type of argument we have */ + Register reg; /*! Holds the argument */ + uint32_t elementSize; /*! Only for structure arguments */ + }; + /*! A function is no more that a set of declared registers and a set of * basic blocks */ @@ -136,8 +158,8 @@ namespace ir { INLINE void deleteInstruction(Instruction *insn) { insnPool.deallocate(insn); } - /*! Get input register */ - INLINE Register getInput(uint32_t ID) const { + /*! Get input argument */ + INLINE const FunctionInput &getInput(uint32_t ID) const { GBE_ASSERT(ID < inputNum()); return inputs[ID]; } @@ -181,7 +203,7 @@ namespace ir { private: friend class Context; //!< Can freely modify a function std::string name; //!< Function name - vector inputs; //!< Input registers of the function + vector inputs; //!< Input registers of the function vector outputs; //!< Output registers of the function vector labels; //!< Each label points to a basic block vector immediates; //!< All immediate values in the function diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index a9adb5f..fd04d5b 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -306,7 +306,7 @@ namespace ir { LoadInstruction(Type type, Tuple dstValues, Register offset, - MemorySpace memSpace, + AddressSpace addrSpace, uint32_t valueNum, bool dwAligned) { @@ -315,7 +315,7 @@ namespace ir { this->type = type; this->offset = offset; this->values = dstValues; - this->memSpace = memSpace; + this->addrSpace = addrSpace; this->valueNum = valueNum; this->dwAligned = dwAligned ? 1 : 0; } @@ -331,13 +331,13 @@ namespace ir { INLINE uint32_t getDstNum(void) const { return valueNum; } INLINE Type getValueType(void) const { return type; } INLINE uint32_t getValueNum(void) const { return valueNum; } - INLINE MemorySpace getAddressSpace(void) const { return memSpace; } + INLINE AddressSpace getAddressSpace(void) const { return addrSpace; } INLINE bool wellFormed(const Function &fn, std::string &why) const; INLINE void out(std::ostream &out, const Function &fn) const; Type type; //!< Type to store Register offset; //!< First source is the offset where to store Tuple values; //!< Values to load - MemorySpace memSpace; //!< Where to load + AddressSpace addrSpace; //!< Where to load uint8_t valueNum:7; //!< Number of values to load uint8_t dwAligned:1; //!< DWORD aligned is what matters with GEN }; @@ -349,7 +349,7 @@ namespace ir { StoreInstruction(Type type, Tuple values, Register offset, - MemorySpace memSpace, + AddressSpace addrSpace, uint32_t valueNum, bool dwAligned) { @@ -358,7 +358,7 @@ namespace ir { this->type = type; this->offset = offset; this->values = values; - this->memSpace = memSpace; + this->addrSpace = addrSpace; this->valueNum = valueNum; this->dwAligned = dwAligned ? 1 : 0; } @@ -372,13 +372,13 @@ namespace ir { INLINE uint32_t getSrcNum(void) const { return valueNum + 1u; } INLINE uint32_t getValueNum(void) const { return valueNum; } INLINE Type getValueType(void) const { return type; } - INLINE MemorySpace getAddressSpace(void) const { return memSpace; } + INLINE AddressSpace getAddressSpace(void) const { return addrSpace; } INLINE bool wellFormed(const Function &fn, std::string &why) const; INLINE void out(std::ostream &out, const Function &fn) const; Type type; //!< Type to store Register offset; //!< First source is the offset where to store Tuple values; //!< Values to store - MemorySpace memSpace; //!< Where to store + AddressSpace addrSpace; //!< Where to store uint8_t valueNum:7; //!< Number of values to store uint8_t dwAligned:1; //!< DWORD aligned is what matters with GEN }; @@ -426,16 +426,16 @@ namespace ir { public BasePolicy, public NoSrcPolicy, public NoDstPolicy { public: - INLINE FenceInstruction(MemorySpace memSpace) { + INLINE FenceInstruction(AddressSpace addrSpace) { this->opcode = OP_FENCE; - this->memSpace = memSpace; + this->addrSpace = addrSpace; } bool wellFormed(const Function &fn, std::string &why) const; INLINE void out(std::ostream &out, const Function &fn) const { this->outOpcode(out); - out << "." << memSpace; + out << "." << addrSpace; } - MemorySpace memSpace; //!< The loads and stores to order + AddressSpace addrSpace; //!< The loads and stores to order }; class ALIGNED_INSTRUCTION LabelInstruction : @@ -680,7 +680,7 @@ namespace ir { INLINE void LoadInstruction::out(std::ostream &out, const Function &fn) const { this->outOpcode(out); - out << "." << type << "." << memSpace << (dwAligned ? "." : ".un") << "aligned"; + out << "." << type << "." << addrSpace << (dwAligned ? "." : ".un") << "aligned"; out << " {"; for (uint32_t i = 0; i < valueNum; ++i) out << "%" << this->getDstIndex(fn, i) << (i != (valueNum-1) ? " " : ""); @@ -690,7 +690,7 @@ namespace ir { INLINE void StoreInstruction::out(std::ostream &out, const Function &fn) const { this->outOpcode(out); - out << "." << type << "." << memSpace << (dwAligned ? "." : ".un") << "aligned"; + out << "." << type << "." << addrSpace << (dwAligned ? "." : ".un") << "aligned"; out << " %" << this->getSrcIndex(fn, 0) << " {"; for (uint32_t i = 0; i < valueNum; ++i) out << "%" << this->getSrcIndex(fn, i+1) << (i != (valueNum-1) ? " " : ""); @@ -718,8 +718,8 @@ namespace ir { } /* namespace internal */ - std::ostream &operator<< (std::ostream &out, MemorySpace memSpace) { - switch (memSpace) { + std::ostream &operator<< (std::ostream &out, AddressSpace addrSpace) { + switch (addrSpace) { case MEM_GLOBAL: return out << "global"; case MEM_LOCAL: return out << "local"; case MEM_CONSTANT: return out << "constant"; @@ -882,10 +882,10 @@ DECL_MEM_FN(ConvertInstruction, Type, getSrcType(void), getSrcType()) DECL_MEM_FN(ConvertInstruction, Type, getDstType(void), getDstType()) DECL_MEM_FN(StoreInstruction, Type, getValueType(void), getValueType()) DECL_MEM_FN(StoreInstruction, uint32_t, getValueNum(void), getValueNum()) -DECL_MEM_FN(StoreInstruction, MemorySpace, getAddressSpace(void), getAddressSpace()) +DECL_MEM_FN(StoreInstruction, AddressSpace, getAddressSpace(void), getAddressSpace()) DECL_MEM_FN(LoadInstruction, Type, getValueType(void), getValueType()) DECL_MEM_FN(LoadInstruction, uint32_t, getValueNum(void), getValueNum()) -DECL_MEM_FN(LoadInstruction, MemorySpace, getAddressSpace(void), getAddressSpace()) +DECL_MEM_FN(LoadInstruction, AddressSpace, getAddressSpace(void), getAddressSpace()) DECL_MEM_FN(LoadImmInstruction, Immediate, getImmediate(const Function &fn), getImmediate(fn)) DECL_MEM_FN(LoadImmInstruction, Type, getType(void), getType()) DECL_MEM_FN(LabelInstruction, LabelIndex, getLabelIndex(void), getLabelIndex()) @@ -999,7 +999,7 @@ DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex()) Instruction NAME(Type type, \ Tuple tuple, \ Register offset, \ - MemorySpace space, \ + AddressSpace space, \ uint32_t valueNum, \ bool dwAligned) \ { \ @@ -1013,7 +1013,7 @@ DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex()) #undef DECL_EMIT_FUNCTION // FENCE - Instruction FENCE(MemorySpace space) { + Instruction FENCE(AddressSpace space) { const internal::FenceInstruction insn(space); return insn.convert(); } diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 1f37311..4548bdc 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -42,7 +42,7 @@ namespace ir { }; /*! Different memory spaces */ - enum MemorySpace : uint8_t { + enum AddressSpace : uint8_t { MEM_GLOBAL = 0, //!< Global memory (a la OCL) MEM_LOCAL, //!< Local memory (thread group memory) MEM_CONSTANT, //!< Immutable global memory @@ -50,7 +50,7 @@ namespace ir { }; /*! Output the memory space */ - std::ostream &operator<< (std::ostream &out, MemorySpace memSpace); + std::ostream &operator<< (std::ostream &out, AddressSpace addrSpace); /*! A label is identified with an unsigned short */ TYPE_SAFE(LabelIndex, uint16_t) @@ -201,7 +201,7 @@ namespace ir { /*! Give the number of values the instruction is storing (srcNum-1) */ uint32_t getValueNum(void) const; /*! Address space that is manipulated here */ - MemorySpace getAddressSpace(void) const; + AddressSpace getAddressSpace(void) const; /*! DWORD aligned means untyped read for Gen. That is what matters */ bool isDWORDAligned(void) const; /*! Return true if the given instruction is an instance of this class */ @@ -219,7 +219,7 @@ namespace ir { /*! Number of values loaded (ie number of destinations) */ uint32_t getValueNum(void) const; /*! Address space that is manipulated here */ - MemorySpace getAddressSpace(void) const; + AddressSpace getAddressSpace(void) const; /*! DWORD aligned means untyped read for Gen. That is what matters */ bool isDWORDAligned(void) const; /*! Return true if the given instruction is an instance of this class */ @@ -393,11 +393,11 @@ namespace ir { /*! loadi.type dst value */ Instruction LOADI(Type type, Register dst, ImmediateIndex value); /*! load.type.space {dst1,...,dst_valueNum} offset value */ - Instruction LOAD(Type type, Tuple dst, Register offset, MemorySpace space, uint32_t valueNum, bool dwAligned); + Instruction LOAD(Type type, Tuple dst, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned); /*! store.type.space offset {src1,...,src_valueNum} value */ - Instruction STORE(Type type, Tuple src, Register offset, MemorySpace space, uint32_t valueNum, bool dwAligned); + Instruction STORE(Type type, Tuple src, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned); /*! fence.space */ - Instruction FENCE(MemorySpace space); + Instruction FENCE(AddressSpace space); /*! label labelIndex */ Instruction LABEL(LabelIndex labelIndex); /*! texture instruction TODO */ diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp index 5cf2377..30ea5e6 100644 --- a/backend/src/ir/liveness.cpp +++ b/backend/src/ir/liveness.cpp @@ -94,36 +94,58 @@ namespace ir { } } + /*! To pretty print the livfeness info */ static const uint32_t prettyInsnStrSize = 48; static const uint32_t prettyRegStrSize = 5; - enum RegisterUse - { - USE_NONE = 0, - USE_READ = 1, - USE_WRITTEN = 2 + /*! Describe how the register is used */ + static const uint32_t USE_NONE = 0; + static const uint32_t USE_READ = 1 << 0; + static const uint32_t USE_WRITTEN = 1 << 1; + + enum UsePosition { + POS_BEFORE = 0, + POS_HERE = 1, + POS_AFTER = 2 }; - /*! "next" includes the provided instruction */ - static INLINE RegisterUse nextUse(const Instruction &insn, Register reg) { + /*! Compute the use of a register in all direction in a block */ + template + static INLINE uint32_t usage(const Instruction &insn, Register reg) { const Function &fn = insn.getParent()->getParent(); const Instruction *curr = &insn; + uint32_t use = USE_NONE; + + // Skip the current element if you are looking forward or backward + if (curr && pos == POS_BEFORE) + curr = curr->getPredecessor(); + else if (curr && pos == POS_AFTER) + curr = curr->getSuccessor(); while (curr) { for (uint32_t srcID = 0; srcID < curr->getSrcNum(); ++srcID) { const Register src = curr->getSrcIndex(fn, srcID); - if (src == reg) return USE_READ; + if (src == reg) { + use |= USE_READ; + break; + } } for (uint32_t dstID = 0; dstID < curr->getDstNum(); ++dstID) { const Register dst = curr->getDstIndex(fn, dstID); - if (dst == reg) return USE_WRITTEN; + if (dst == reg) { + use |= USE_WRITTEN; + break; + } } - curr = curr->getSuccessor(); + if (use != USE_NONE) + break; + if (pos == POS_BEFORE) + curr = curr->getPredecessor(); + else if (pos == POS_AFTER) + curr = curr->getSuccessor(); + else + curr = NULL; } - return USE_NONE; - } - /*! "previous" does not include the provided instruction */ - static INLINE RegisterUse previousUse(const Instruction &insn, Register reg) { - return USE_NONE; + return use; } /*! Just print spaceNum spaces */ @@ -162,14 +184,28 @@ namespace ir { { for (uint32_t regID = 0; regID < fn.regNum(); ++regID) { const Register reg(regID); + // Use in that instruction means alive + if (usage(insn, reg) != USE_NONE) { + printAlive(out); + continue; + } // Non-killed and liveout == alive in the complete block - if (info.inLiveOut(reg) == true && info.inVarKill(reg) == false) + if (info.inLiveOut(reg) == true && info.inVarKill(reg) == false) { printAlive(out); - // We must look for the last use of the instruction - else if (info.inLiveOut(reg) == false) { - - } else - printDead(out); + continue; + } + // It is going to be read + const uint32_t nextUsage = usage(insn, reg); + if ((nextUsage & USE_READ) != USE_NONE) { + printAlive(out); + continue; + } + // It is not written and alive at the end of the block + if ((nextUsage & USE_WRITTEN) == USE_NONE && info.inLiveOut(reg) == true) { + printAlive(out); + continue; + } + printDead(out); } } out << std::endl; diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 74948dc..31b4dfc 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -154,6 +154,17 @@ namespace gbe return type; } + /*! OCL to Gen-IR address type */ + static INLINE ir::AddressSpace addressSpaceLLVMToGen(unsigned llvmMemSpace) { + switch (llvmMemSpace) { + case 0: return ir::MEM_PRIVATE; + case 1: return ir::MEM_GLOBAL; + case 2: return ir::MEM_CONSTANT; + case 4: return ir::MEM_LOCAL; + } + GBE_ASSERT(false); + return ir::MEM_GLOBAL; + } /*! Handle the LLVM IR Value to Gen IR register translation. This has 2 roles: * - Split the LLVM vector into several scalar values @@ -596,17 +607,46 @@ namespace gbe void GenWriter::emitFunctionPrototype(Function &F) { GBE_ASSERTM(F.hasStructRetAttr() == false, - "Returned value for kernel functions"); + "Returned value for kernel functions is forbidden"); // Loop over the arguments and output registers for them if (!F.arg_empty()) { Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); + const AttrListPtr &PAL = F.getAttributes(); // Insert a new register for each function argument - for (; I != E; ++I) { - const Type *type = I->getType(); + uint32_t argID = 1; // Start at one actually + for (; I != E; ++I, ++argID) { + Type *type = I->getType(); GBE_ASSERT(isScalarType(type) == true); const ir::Register reg = regTranslator.newScalar(I); - ctx.input(reg); + if (type->isPointerTy() == false) + ctx.input(ir::FunctionInput::VALUE, reg); + else { + PointerType *pointerType = dyn_cast(type); + // By value structure + if (PAL.paramHasAttr(argID, Attribute::ByVal)) { + Type *pointed = pointerType->getElementType(); + const size_t structSize = getTypeByteSize(unit, pointed); + ctx.input(ir::FunctionInput::STRUCTURE, reg, structSize); + } + // Regular user provided pointer (global, local or constant) + else { + const uint32_t addr = pointerType->getAddressSpace(); + const ir::AddressSpace addrSpace = addressSpaceLLVMToGen(addr); + switch (addrSpace) { + case ir::MEM_GLOBAL: + ctx.input(ir::FunctionInput::GLOBAL_POINTER, reg); + break; + case ir::MEM_LOCAL: + ctx.input(ir::FunctionInput::LOCAL_POINTER, reg); + break; + case ir::MEM_CONSTANT: + ctx.input(ir::FunctionInput::CONSTANT_POINTER, reg); + break; + default: GBE_ASSERT(addrSpace != ir::MEM_PRIVATE); + } + } + } } } @@ -614,7 +654,7 @@ namespace gbe // structure const Type *type = F.getReturnType(); GBE_ASSERTM(type->isVoidTy() == true, - "Returned value for kernel functions"); + "Returned value for kernel functions is forbidden"); #if GBE_DEBUG // Variable number of arguments is not supported @@ -1143,16 +1183,6 @@ namespace gbe NOT_SUPPORTED; } - static INLINE ir::MemorySpace addressSpaceLLVMToGen(unsigned llvmMemSpace) { - switch (llvmMemSpace) { - case 0: return ir::MEM_PRIVATE; - case 1: return ir::MEM_GLOBAL; - case 4: return ir::MEM_LOCAL; - } - GBE_ASSERT(false); - return ir::MEM_GLOBAL; - } - static INLINE Value *getLoadOrStoreValue(LoadInst &I) { return &I; } @@ -1173,7 +1203,7 @@ namespace gbe Value *llvmValues = getLoadOrStoreValue(I); Type *llvmType = llvmValues->getType(); const bool dwAligned = (I.getAlignment() % 4) == 0; - const ir::MemorySpace memSpace = addressSpaceLLVMToGen(llvmSpace); + const ir::AddressSpace addrSpace = addressSpaceLLVMToGen(llvmSpace); const ir::Register ptr = this->getRegister(llvmPtr); // Scalar is easy. We neednot build register tuples @@ -1181,9 +1211,9 @@ namespace gbe const ir::Type type = getType(ctx, llvmType); const ir::Register values = this->getRegister(llvmValues); if (isLoad) - ctx.LOAD(type, ptr, memSpace, dwAligned, values); + ctx.LOAD(type, ptr, addrSpace, dwAligned, values); else - ctx.STORE(type, ptr, memSpace, dwAligned, values); + ctx.STORE(type, ptr, addrSpace, dwAligned, values); } // A vector type requires to build a tuple else { @@ -1202,9 +1232,9 @@ namespace gbe // Emit the instruction const ir::Type type = getType(ctx, elemType); if (isLoad) - ctx.LOAD(type, tuple, ptr, memSpace, elemNum, dwAligned); + ctx.LOAD(type, tuple, ptr, addrSpace, elemNum, dwAligned); else - ctx.STORE(type, tuple, ptr, memSpace, elemNum, dwAligned); + ctx.STORE(type, tuple, ptr, addrSpace, elemNum, dwAligned); } } -- 2.7.4