From: Ruiling Song Date: Thu, 18 Sep 2014 06:42:01 +0000 (+0800) Subject: GBE/libocl: Add __gen_ocl_get_timestamp() to get timestamp. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c0ba37d62dcac92adfc309e73abd7e12a02d8498;p=contrib%2Fbeignet.git GBE/libocl: Add __gen_ocl_get_timestamp() to get timestamp. Gen provide tm0 register for intra-kernel profiling. Here we provide an API __gen_ocl_get_timestamp() to return the timestamp in TM. The return type is defined as: struct time_stamp { ulong tick; uint event; }; 'tick' is a 64bit time tick. 'event' stores a value which means whether a tmEvent has occured (non-zero) or not (0). tmEvent includes time-impacting event such as context switch or frequency change since last time tm0 was read. I add a sample in the kernels/compiler_time_stamp.cl. Hope it would help you understand how to use it. V2: Introduce ir::ARFRegister to avoid directly use of nr/subnr in Gen IR. Rename __gen_ocl_extract_reg to __gen_ocl_region. Rename beignet_get_time_stamp to __gen_ocl_get_timestamp. Signed-off-by: Ruiling Song Reviewed-by: Zhigang Gong --- diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c index c120b60..266b501 100644 --- a/backend/src/backend/gen/gen_mesa_disasm.c +++ b/backend/src/backend/gen/gen_mesa_disasm.c @@ -552,6 +552,9 @@ static int reg (FILE *file, uint32_t _reg_file, uint32_t _reg_nr) string (file, "ip"); return -1; break; + case GEN_ARF_TM: + format (file, "tm%d", _reg_nr & 0x0f); + break; default: format (file, "ARF%d", _reg_nr); break; diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 2550567..175878d 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -202,6 +202,7 @@ namespace gbe const GenRegister src = ra->genReg(insn.src(0)); switch (insn.opcode) { case SEL_OP_MOV: p->MOV(dst, src, insn.extra.function); break; + case SEL_OP_READ_ARF: p->MOV(dst, src); break; case SEL_OP_FBH: p->FBH(dst, src); break; case SEL_OP_FBL: p->FBL(dst, src); break; case SEL_OP_NOT: p->NOT(dst, src); break; diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp index f0da50a..19aad95 100644 --- a/backend/src/backend/gen_defs.hpp +++ b/backend/src/backend/gen_defs.hpp @@ -261,6 +261,7 @@ enum GenMessageTarget { #define GEN_ARF_CONTROL 0x80 #define GEN_ARF_NOTIFICATION_COUNT 0x90 #define GEN_ARF_IP 0xA0 +#define GEN_ARF_TM 0xC0 #define GEN_MRF_COMPR4 (1 << 7) diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp index 106d608..ead3e26 100644 --- a/backend/src/backend/gen_insn_scheduling.cpp +++ b/backend/src/backend/gen_insn_scheduling.cpp @@ -190,6 +190,10 @@ namespace gbe static const uint32_t MAX_FLAG_REGISTER = 8u; /*! Maximum number of *physical* accumulators registers */ static const uint32_t MAX_ACC_REGISTER = 1u; + /*! Maximum number of *physical* tm registers */ + static const uint32_t MAX_TM_REGISTER = 1u; + /*! Maximum number of *physical* arf registers */ + static const uint32_t MAX_ARF_REGISTER = MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_TM_REGISTER; /*! Stores the last node that wrote to a register / memory ... */ vector nodes; /*! store nodes each node depends on */ @@ -237,12 +241,12 @@ namespace gbe { if (scheduler.policy == PRE_ALLOC) { this->grfNum = selection.getRegNum(); - nodes.resize(grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_MEM_SYSTEM); + nodes.resize(grfNum + MAX_ARF_REGISTER + MAX_MEM_SYSTEM); } else { const uint32_t simdWidth = scheduler.ctx.getSimdWidth(); GBE_ASSERT(simdWidth == 8 || simdWidth == 16); this->grfNum = simdWidth == 8 ? 128 : 64; - nodes.resize(grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_MEM_SYSTEM); + nodes.resize(grfNum + MAX_ARF_REGISTER + MAX_MEM_SYSTEM); } insnNodes.resize(selection.getLargestBlockSize()); } @@ -327,6 +331,8 @@ namespace gbe } else if (file == GEN_ARF_ACCUMULATOR) { GBE_ASSERT(nr < MAX_ACC_REGISTER); return grfNum + MAX_FLAG_REGISTER + nr; + } else if (file == GEN_ARF_TM) { + return grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER; } else { NOT_SUPPORTED; return 0; @@ -348,7 +354,7 @@ namespace gbe } uint32_t DependencyTracker::getIndex(uint32_t bti) const { - const uint32_t memDelta = grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER; + const uint32_t memDelta = grfNum + MAX_ARF_REGISTER; return bti == 0xfe ? memDelta + LOCAL_MEMORY : (bti == 0xff ? memDelta + SCRATCH_MEMORY : memDelta + GLOBAL_MEMORY); } @@ -583,6 +589,7 @@ namespace gbe ScheduleDAGNode *node = tracker.insnNodes[insnID]; if (node->insn.isBranch() || node->insn.isLabel() || node->insn.opcode == SEL_OP_EOT || node->insn.opcode == SEL_OP_IF + || node->insn.opcode == SEL_OP_READ_ARF || node->insn.opcode == SEL_OP_BARRIER) tracker.makeBarrier(insnID, insnNum); } diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index d631579..f284ae1 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -458,6 +458,7 @@ namespace gbe #define I64Shift(OP) \ INLINE void OP(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) { I64Shift(SEL_OP_##OP, dst, src0, src1, tmp); } ALU1(MOV) + ALU1(READ_ARF) ALU1WithTemp(MOV_DF) ALU1WithTemp(LOAD_DF_IMM) ALU1(LOAD_INT64_IMM) @@ -3979,6 +3980,70 @@ namespace gbe DECL_CTOR(GetImageInfoInstruction, 1, 1); }; + class ReadARFInstructionPattern : public SelectionPattern + { + public: + ReadARFInstructionPattern(void) : SelectionPattern(1,1) { + this->opcodes.push_back(ir::OP_READ_ARF); + } + + INLINE uint32_t getRegNum(ir::ARFRegister arf) const { + if (arf == ir::ARF_TM) { + return 0xc0; + } else { + GBE_ASSERT(0); + return 0; + } + } + + INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const { + using namespace ir; + const ir::ReadARFInstruction &insn = cast(dag.insn); + GenRegister dst; + dst = sel.selReg(insn.getDst(0), insn.getType()); + + sel.push(); + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.curr.noMask = 1; + sel.curr.execWidth = 8; + sel.READ_ARF(dst, GenRegister(GEN_ARCHITECTURE_REGISTER_FILE, + getRegNum(insn.getARFRegister()), + 0, + getGenType(insn.getType()), + GEN_VERTICAL_STRIDE_8, + GEN_WIDTH_8, + GEN_HORIZONTAL_STRIDE_1)); + sel.pop(); + return true; + } + }; + + /*! Get a region of a register */ + class RegionInstructionPattern : public SelectionPattern + { + public: + RegionInstructionPattern(void) : SelectionPattern(1,1) { + this->opcodes.push_back(ir::OP_REGION); + } + INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const { + using namespace ir; + const ir::RegionInstruction &insn = cast(dag.insn); + GenRegister dst, src; + dst = sel.selReg(insn.getDst(0), ir::TYPE_U32); + src = GenRegister::ud1grf(insn.getSrc(0)); + src.subphysical = 1; + src = GenRegister::offset(src, 0, insn.getOffset()*4); + + sel.push(); + sel.curr.noMask = 1; + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.MOV(dst, src); + sel.pop(); + markAllChildren(dag); + return true; + } + }; + /*! Branch instruction pattern */ class BranchInstructionPattern : public SelectionPattern { @@ -4190,6 +4255,8 @@ namespace gbe this->insert(); this->insert(); this->insert(); + this->insert(); + this->insert(); // Sort all the patterns with the number of instructions they output for (uint32_t op = 0; op < ir::OP_INVALID; ++op) diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index 2d70982..048a844 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -85,3 +85,4 @@ DECL_SELECTION_IR(BRD, UnaryInstruction) DECL_SELECTION_IR(IF, UnaryInstruction) DECL_SELECTION_IR(ENDIF, UnaryInstruction) DECL_SELECTION_IR(ELSE, UnaryInstruction) +DECL_SELECTION_IR(READ_ARF, UnaryInstruction) diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 370fb87..2d86480 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -668,6 +668,48 @@ namespace ir { Register dst[0], src[0]; }; + class ALIGNED_INSTRUCTION ReadARFInstruction : + public BasePolicy, + public NSrcPolicy, + public NDstPolicy + { + public: + INLINE ReadARFInstruction(Type type, Register dst, ARFRegister arf) { + this->type = type; + this->dst[0] = dst; + this->opcode = OP_READ_ARF; + this->arf = arf; + } + INLINE ir::ARFRegister getARFRegister(void) const { return this->arf; } + INLINE Type getType(void) const { return this->type; } + INLINE bool wellFormed(const Function &fn, std::string &why) const; + INLINE void out(std::ostream &out, const Function &fn) const; + Type type; + ARFRegister arf; + Register dst[1]; + Register src[0]; + }; + + class ALIGNED_INSTRUCTION RegionInstruction : + public BasePolicy, + public NSrcPolicy, + public NDstPolicy + { + public: + INLINE RegionInstruction(Register dst, Register src, uint32_t offset) { + this->offset = offset; + this->dst[0] = dst; + this->src[0] = src; + this->opcode = OP_REGION; + } + INLINE uint32_t getOffset(void) const { return this->offset; } + INLINE bool wellFormed(const Function &fn, std::string &why) const; + INLINE void out(std::ostream &out, const Function &fn) const; + uint32_t offset; + Register dst[1]; + Register src[1]; + }; + class ALIGNED_INSTRUCTION LabelInstruction : public BasePolicy, public NSrcPolicy, @@ -1022,6 +1064,30 @@ namespace ir { return true; } + INLINE bool ReadARFInstruction::wellFormed(const Function &fn, std::string &whyNot) const + { + if (UNLIKELY( this->type != TYPE_U32 && this->type != TYPE_S32)) { + whyNot = "Only support S32/U32 type"; + return false; + } + + const RegisterFamily family = getFamily(this->type); + if (UNLIKELY(checkRegisterData(family, dst[0], fn, whyNot) == false)) + return false; + + return true; + } + + INLINE bool RegionInstruction::wellFormed(const Function &fn, std::string &whyNot) const + { + if (UNLIKELY(checkRegisterData(FAMILY_DWORD, src[0], fn, whyNot) == false)) + return false; + if (UNLIKELY(checkRegisterData(FAMILY_DWORD, dst[0], fn, whyNot) == false)) + return false; + + return true; + } + // Only a label index is required INLINE bool LabelInstruction::wellFormed(const Function &fn, std::string &whyNot) const { @@ -1138,6 +1204,16 @@ namespace ir { out << ": " << (int)bti.bti[i]; } + INLINE void ReadARFInstruction::out(std::ostream &out, const Function &fn) const { + this->outOpcode(out); + out << " %" << this->getDst(fn, 0) << " arf:" << arf; + } + + INLINE void RegionInstruction::out(std::ostream &out, const Function &fn) const { + this->outOpcode(out); + out << " %" << this->getDst(fn, 0) << " %" << this->getSrc(fn, 0) << " offset: " << this->offset; + } + INLINE void LabelInstruction::out(std::ostream &out, const Function &fn) const { this->outOpcode(out); out << " $" << labelIndex; @@ -1287,6 +1363,14 @@ START_INTROSPECTION(SyncInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(SyncInstruction) +START_INTROSPECTION(ReadARFInstruction) +#include "ir/instruction.hxx" +END_INTROSPECTION(ReadARFInstruction) + +START_INTROSPECTION(RegionInstruction) +#include "ir/instruction.hxx" +END_INTROSPECTION(RegionInstruction) + START_INTROSPECTION(LabelInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(LabelInstruction) @@ -1471,6 +1555,9 @@ DECL_MEM_FN(BranchInstruction, bool, isPredicated(void), isPredicated()) DECL_MEM_FN(BranchInstruction, bool, getInversePredicated(void), getInversePredicated()) DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex()) DECL_MEM_FN(SyncInstruction, uint32_t, getParameters(void), getParameters()) +DECL_MEM_FN(ReadARFInstruction, Type, getType(void), getType()) +DECL_MEM_FN(ReadARFInstruction, ARFRegister, getARFRegister(void), getARFRegister()) +DECL_MEM_FN(RegionInstruction, uint32_t, getOffset(void), getOffset()) DECL_MEM_FN(SampleInstruction, Type, getSrcType(void), getSrcType()) DECL_MEM_FN(SampleInstruction, Type, getDstType(void), getDstType()) DECL_MEM_FN(SampleInstruction, uint8_t, getSamplerIndex(void), getSamplerIndex()) @@ -1667,6 +1754,13 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex return internal::SyncInstruction(parameters).convert(); } + Instruction READ_ARF(Type type, Register dst, ARFRegister arf) { + return internal::ReadARFInstruction(type, dst, arf).convert(); + } + Instruction REGION(Register dst, Register src, uint32_t offset) { + return internal::RegionInstruction(dst, src, offset).convert(); + } + // LABEL Instruction LABEL(LabelIndex labelIndex) { return internal::LabelInstruction(labelIndex).convert(); diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 39fb2db..3526a41 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -496,6 +496,23 @@ namespace ir { static bool isClassOf(const Instruction &insn); }; + /*! Read one register (8 DWORD) in arf */ + class ReadARFInstruction : public Instruction { + public: + Type getType() const; + ir::ARFRegister getARFRegister() const; + /*! Return true if the given instruction is an instance of this class */ + static bool isClassOf(const Instruction &insn); + }; + + /*! return a region of a register, make sure the offset does not exceed the register size */ + class RegionInstruction : public Instruction { + public: + uint32_t getOffset(void) const; + /*! Return true if the given instruction is an instance of this class */ + static bool isClassOf(const Instruction &insn); + }; + /*! Specialize the instruction. Also performs typechecking first based on the * opcode. Crashes if it fails */ @@ -680,6 +697,9 @@ namespace ir { Instruction LOADI(Type type, Register dst, ImmediateIndex value); /*! sync.params... (see Sync instruction) */ Instruction SYNC(uint32_t parameters); + + Instruction READ_ARF(Type type, Register dst, ARFRegister arf); + Instruction REGION(Register dst, Register src, uint32_t offset); /*! typed write */ Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, Type srcType, Type coordType); /*! sample textures */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index abc984f..40b5305 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -79,6 +79,8 @@ DECL_INSN(TYPED_WRITE, TypedWriteInstruction) DECL_INSN(SAMPLE, SampleInstruction) DECL_INSN(SYNC, SyncInstruction) DECL_INSN(LABEL, LabelInstruction) +DECL_INSN(READ_ARF, ReadARFInstruction) +DECL_INSN(REGION, RegionInstruction) DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction) DECL_INSN(MUL_HI, BinaryInstruction) DECL_INSN(I64_MUL_HI, BinaryInstruction) diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp index 2a0aa54..eaf6728 100644 --- a/backend/src/ir/liveness.cpp +++ b/backend/src/ir/liveness.cpp @@ -79,6 +79,7 @@ namespace ir { opCode != ir::OP_MUL_HI && opCode != ir::OP_HADD && opCode != ir::OP_RHADD && + opCode != ir::OP_READ_ARF && opCode != ir::OP_ADDSAT && (dstNum == 1 || insn.getOpcode() != ir::OP_LOAD) && !extentRegs->contains(reg) diff --git a/backend/src/ir/register.hpp b/backend/src/ir/register.hpp index 5995ba5..7e53e1a 100644 --- a/backend/src/ir/register.hpp +++ b/backend/src/ir/register.hpp @@ -63,6 +63,21 @@ namespace ir { return 0; } + enum ARFRegister { + ARF_NULL = 0, + ARF_ADDRESS, + ARF_ACCUMULATOR, + ARF_FLAG, + ARF_MASK, + ARF_MASK_STACK, + ARF_MASK_STACK_DEPTH, + ARF_STATE, + ARF_CONTROL, + ARF_NOTIFICATION_COUNT, + ARF_IP, + ARF_TM + }; + /*! A register can be either a byte, a word, a dword or a qword. We store this * value into a register data (which makes the register file) */ diff --git a/backend/src/libocl/include/ocl_misc.h b/backend/src/libocl/include/ocl_misc.h index 8bd1eb3..5aa1c42 100644 --- a/backend/src/libocl/include/ocl_misc.h +++ b/backend/src/libocl/include/ocl_misc.h @@ -136,4 +136,13 @@ DEF(ulong) short __gen_ocl_simd_any(short); short __gen_ocl_simd_all(short); +struct time_stamp { + // time tick + ulong tick; + // If context-switch or frequency change occurs since last read of tm, + // event will be non-zero, otherwise, it will be zero. + uint event; +}; + +struct time_stamp __gen_ocl_get_timestamp(void); #endif diff --git a/backend/src/libocl/src/ocl_misc.cl b/backend/src/libocl/src/ocl_misc.cl index 9b4f2d4..ee86f7d 100644 --- a/backend/src/libocl/src/ocl_misc.cl +++ b/backend/src/libocl/src/ocl_misc.cl @@ -216,3 +216,16 @@ DEF(ulong) #undef DEC8X #undef DEC16 #undef DEC16X + +uint __gen_ocl_read_tm(void); +uint __gen_ocl_region(ushort offset, uint data); + +struct time_stamp __gen_ocl_get_timestamp(void) { + struct time_stamp val; + + uint tm = __gen_ocl_read_tm(); + val.tick = ((ulong)__gen_ocl_region(1, tm) << 32) | __gen_ocl_region(0, tm); + val.event = __gen_ocl_region(2, tm); + + return val; +}; diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 918af24..39b441f 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2646,6 +2646,8 @@ namespace gbe case GEN_OCL_CONV_F32_TO_F16: case GEN_OCL_SIMD_ANY: case GEN_OCL_SIMD_ALL: + case GEN_OCL_READ_TM: + case GEN_OCL_REGION: this->newRegister(&I); break; case GEN_OCL_PRINTF: @@ -2798,6 +2800,26 @@ namespace gbe ctx.ALU1(ir::OP_SIMD_ANY, ir::TYPE_S16, dst, src); break; } + case GEN_OCL_READ_TM: + { + const ir::Register dst = this->getRegister(&I); + ctx.READ_ARF(ir::TYPE_U32, dst, ir::ARF_TM); + break; + } + case GEN_OCL_REGION: + { + const ir::Register dst = this->getRegister(&I); + // offset must be immediate + GBE_ASSERT(AI != AE); Constant *CPV = dyn_cast(*AI); + assert(CPV); + const ir::Immediate &x = processConstantImm(CPV); + + AI++; + const ir::Register src = this->getRegister(*AI); + + ctx.REGION(dst, src, x.getIntegerValue()); + break; + } case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break; case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break; case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break; diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 05639a9..f508bcc 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -190,5 +190,8 @@ DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16) DECL_LLVM_GEN_FUNCTION(SIMD_ANY, __gen_ocl_simd_any) DECL_LLVM_GEN_FUNCTION(SIMD_ALL, __gen_ocl_simd_all) +DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm) +DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region) + // printf function DECL_LLVM_GEN_FUNCTION(PRINTF, __gen_ocl_printf) diff --git a/kernels/compiler_time_stamp.cl b/kernels/compiler_time_stamp.cl new file mode 100644 index 0000000..f66da58 --- /dev/null +++ b/kernels/compiler_time_stamp.cl @@ -0,0 +1,28 @@ +__kernel void +compiler_time_stamp(__global int *src, __global int *dst) +{ + int i; + int final[16]; + struct time_stamp t1, t2, t3; + t1 = __gen_ocl_get_timestamp(); + for (i = 0; i < 16; ++i) { + int array[16], j; + for (j = 0; j < 16; ++j) + array[j] = get_global_id(0); + for (j = 0; j < src[0]; ++j) + array[j] = 1+src[j]; + final[i] = array[i]; + if(i == 7) + t2 = __gen_ocl_get_timestamp(); + } + t3 = __gen_ocl_get_timestamp(); + // currently printf does not support long type. + // printf("tmEvt %d %d %d tmDiff %lu %lu\n", t3-t1, t2-t1); + + // time_stamp.event maybe not zero, then the time diff is not accurate, + // because a time event occurs before the time stamp. + printf("tmEvt %d %d %d tmDiff %u %u\n", t1.event, t2.event, t3.event, + (uint)(t3.tick-t1.tick), (uint)(t2.tick-t1.tick)); + + dst[get_global_id(0)] = final[get_global_id(0)]; +} diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 2bd6be0..b45ecf9 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -172,6 +172,7 @@ set (utests_sources compiler_getelementptr_bitcast.cpp compiler_simd_any.cpp compiler_simd_all.cpp + compiler_time_stamp.cpp compiler_double_precision.cpp load_program_from_bin_file.cpp load_program_from_gen_bin.cpp diff --git a/utests/compiler_time_stamp.cpp b/utests/compiler_time_stamp.cpp new file mode 100644 index 0000000..4da5752 --- /dev/null +++ b/utests/compiler_time_stamp.cpp @@ -0,0 +1,52 @@ +#include "utest_helper.hpp" + +static void cpu(int global_id, int *src, int *dst) { + int i; + int final[16]; + for (i = 0; i < 16; ++i) { + int array[16], j; + for (j = 0; j < 16; ++j) + array[j] = global_id; + for (j = 0; j < src[0]; ++j) + array[j] = 1+src[j]; + final[i] = array[i]; + } + dst[global_id] = final[global_id]; +} + +void compiler_time_stamp(void) +{ + const size_t n = 16; + int cpu_dst[16], cpu_src[16]; + + // Setup kernel and buffers + OCL_CREATE_KERNEL("compiler_time_stamp"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + globals[0] = 16; + locals[0] = 16; + + // Run random tests + for (uint32_t pass = 0; pass < 1; ++pass) { + OCL_MAP_BUFFER(0); + for (int32_t i = 0; i < (int32_t) n; ++i) + cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16; + OCL_UNMAP_BUFFER(0); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Run on CPU + for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); + + // Compare + OCL_MAP_BUFFER(1); + for (int32_t i = 0; i < 11; ++i) + OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]); + OCL_UNMAP_BUFFER(1); + } +} + +MAKE_UTEST_FROM_FUNCTION(compiler_time_stamp);