From 188f7c7e78c41b42beac4e7bc59970dcae29c8d1 Mon Sep 17 00:00:00 2001 From: Yang Rong Date: Thu, 27 Jun 2013 16:47:56 +0800 Subject: [PATCH] Add all atomic built-in functions. Treat all atomic function's operands as unsigned int, except imin/imax. So use the different function __gen_ocl_atomic_umin and __gen_ocl_atomic_imin. Overload different address space, local and global. Signed-off-by: Yang Rong Reviewed-by: Zhigang Gong --- backend/src/llvm/llvm_gen_backend.cpp | 76 ++++++++++++++++++++++ backend/src/llvm/llvm_gen_ocl_function.hxx | 28 ++++++++ backend/src/ocl_stdlib.h | 100 ++++++++++++++++++++++++++++- 3 files changed, 203 insertions(+), 1 deletion(-) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index e6c0765..c06183e 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -534,6 +534,8 @@ namespace gbe // Emit unary instructions from gen native function void emitUnaryCallInst(CallInst &I, CallSite &CS, ir::Opcode opcode); + // Emit unary instructions from gen native function + void emitAtomicInst(CallInst &I, CallSite &CS, ir::AtomicOps opcode); // These instructions are not supported at all void visitVAArgInst(VAArgInst &I) {NOT_SUPPORTED;} @@ -693,10 +695,12 @@ namespace gbe return doIt(uint64_t(0)); } } + // NULL pointers if(isa(CPV)) { return doIt(uint32_t(0)); } + // Floats and doubles const Type::TypeID typeID = CPV->getType()->getTypeID(); switch (typeID) { @@ -1697,6 +1701,32 @@ namespace gbe case GEN_OCL_GET_IMAGE_CHANNEL_DATA_TYPE: case GEN_OCL_GET_IMAGE_CHANNEL_ORDER: case GEN_OCL_GET_IMAGE_DEPTH: + case GEN_OCL_ATOMIC_ADD0: + case GEN_OCL_ATOMIC_ADD1: + case GEN_OCL_ATOMIC_SUB0: + case GEN_OCL_ATOMIC_SUB1: + case GEN_OCL_ATOMIC_AND0: + case GEN_OCL_ATOMIC_AND1: + case GEN_OCL_ATOMIC_OR0: + case GEN_OCL_ATOMIC_OR1: + case GEN_OCL_ATOMIC_XOR0: + case GEN_OCL_ATOMIC_XOR1: + case GEN_OCL_ATOMIC_XCHG0: + case GEN_OCL_ATOMIC_XCHG1: + case GEN_OCL_ATOMIC_UMAX0: + case GEN_OCL_ATOMIC_UMAX1: + case GEN_OCL_ATOMIC_UMIN0: + case GEN_OCL_ATOMIC_UMIN1: + case GEN_OCL_ATOMIC_IMAX0: + case GEN_OCL_ATOMIC_IMAX1: + case GEN_OCL_ATOMIC_IMIN0: + case GEN_OCL_ATOMIC_IMIN1: + case GEN_OCL_ATOMIC_INC0: + case GEN_OCL_ATOMIC_INC1: + case GEN_OCL_ATOMIC_DEC0: + case GEN_OCL_ATOMIC_DEC1: + case GEN_OCL_ATOMIC_CMPXCHG0: + case GEN_OCL_ATOMIC_CMPXCHG1: // No structure can be returned this->newRegister(&I); break; @@ -1781,6 +1811,26 @@ namespace gbe ctx.ALU1(opcode, ir::TYPE_FLOAT, dst, src); } + void GenWriter::emitAtomicInst(CallInst &I, CallSite &CS, ir::AtomicOps opcode) { + CallSite::arg_iterator AI = CS.arg_begin(); +#if GBE_DEBUG + CallSite::arg_iterator AE = CS.arg_end(); +#endif /* GBE_DEBUG */ + GBE_ASSERT(AI != AE); + unsigned int llvmSpace = (*AI)->getType()->getPointerAddressSpace(); + const ir::AddressSpace addrSpace = addressSpaceLLVMToGen(llvmSpace); + const ir::Register dst = this->getRegister(&I); + + vector src; + uint32_t srcNum = 0; + while(AI != AE) { + src.push_back(this->getRegister(*(AI++))); + srcNum++; + } + const ir::Tuple srcTuple = ctx.arrayTuple(&src[0], srcNum); + ctx.ATOMIC(opcode, dst, addrSpace, srcTuple); + } + void GenWriter::emitCallInst(CallInst &I) { if (Function *F = I.getCalledFunction()) { if (F->getIntrinsicID() != 0) { @@ -1862,6 +1912,32 @@ namespace gbe case GEN_OCL_LBARRIER: ctx.SYNC(ir::syncLocalBarrier); break; case GEN_OCL_GBARRIER: ctx.SYNC(ir::syncGlobalBarrier); break; case GEN_OCL_LGBARRIER: ctx.SYNC(ir::syncLocalBarrier | ir::syncGlobalBarrier); break; + case GEN_OCL_ATOMIC_ADD0: + case GEN_OCL_ATOMIC_ADD1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_ADD); break; + case GEN_OCL_ATOMIC_SUB0: + case GEN_OCL_ATOMIC_SUB1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_SUB); break; + case GEN_OCL_ATOMIC_AND0: + case GEN_OCL_ATOMIC_AND1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_AND); break; + case GEN_OCL_ATOMIC_OR0: + case GEN_OCL_ATOMIC_OR1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_OR); break; + case GEN_OCL_ATOMIC_XOR0: + case GEN_OCL_ATOMIC_XOR1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_XOR); break; + case GEN_OCL_ATOMIC_XCHG0: + case GEN_OCL_ATOMIC_XCHG1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_XCHG); break; + case GEN_OCL_ATOMIC_INC0: + case GEN_OCL_ATOMIC_INC1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_INC); break; + case GEN_OCL_ATOMIC_DEC0: + case GEN_OCL_ATOMIC_DEC1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_DEC); break; + case GEN_OCL_ATOMIC_UMIN0: + case GEN_OCL_ATOMIC_UMIN1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_UMIN); break; + case GEN_OCL_ATOMIC_UMAX0: + case GEN_OCL_ATOMIC_UMAX1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_UMAX); break; + case GEN_OCL_ATOMIC_IMIN0: + case GEN_OCL_ATOMIC_IMIN1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_IMIN); break; + case GEN_OCL_ATOMIC_IMAX0: + case GEN_OCL_ATOMIC_IMAX1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_IMAX); break; + case GEN_OCL_ATOMIC_CMPXCHG0: + case GEN_OCL_ATOMIC_CMPXCHG1: this->emitAtomicInst(I,CS,ir::ATOMIC_OP_CMPXCHG); break; case GEN_OCL_GET_IMAGE_WIDTH: case GEN_OCL_GET_IMAGE_HEIGHT: case GEN_OCL_GET_IMAGE_DEPTH: diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index fe19844..128c677 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -78,6 +78,34 @@ DECL_LLVM_GEN_FUNCTION(GET_IMAGE_DEPTH, __gen_ocl_get_image_depth) DECL_LLVM_GEN_FUNCTION(GET_IMAGE_CHANNEL_DATA_TYPE, __gen_ocl_get_image_channel_data_type) DECL_LLVM_GEN_FUNCTION(GET_IMAGE_CHANNEL_ORDER, __gen_ocl_get_image_channel_order) +// atomic related functions. +DECL_LLVM_GEN_FUNCTION(ATOMIC_ADD0, _Z20__gen_ocl_atomic_addPU3AS1jj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_ADD1, _Z20__gen_ocl_atomic_addPU3AS3jj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_SUB0, _Z20__gen_ocl_atomic_subPU3AS1jj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_SUB1, _Z20__gen_ocl_atomic_subPU3AS3jj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_AND0, _Z20__gen_ocl_atomic_andPU3AS1jj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_AND1, _Z20__gen_ocl_atomic_andPU3AS3jj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_OR0, _Z19__gen_ocl_atomic_orPU3AS1jj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_OR1, _Z19__gen_ocl_atomic_orPU3AS3jj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_XOR0, _Z20__gen_ocl_atomic_xorPU3AS1jj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_XOR1, _Z20__gen_ocl_atomic_xorPU3AS3jj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_UMIN0, _Z21__gen_ocl_atomic_uminPU3AS1jj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_UMIN1, _Z21__gen_ocl_atomic_uminPU3AS3jj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_UMAX0, _Z21__gen_ocl_atomic_umaxPU3AS1jj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_UMAX1, _Z21__gen_ocl_atomic_umaxPU3AS3jj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_IMIN0, _Z21__gen_ocl_atomic_iminPU3AS1jj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_IMIN1, _Z21__gen_ocl_atomic_iminPU3AS3jj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_IMAX0, _Z21__gen_ocl_atomic_imaxPU3AS1jj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_IMAX1, _Z21__gen_ocl_atomic_imaxPU3AS3jj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_XCHG0, _Z21__gen_ocl_atomic_xchgPU3AS1jj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_XCHG1, _Z21__gen_ocl_atomic_xchgPU3AS3jj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_INC0, _Z20__gen_ocl_atomic_incPU3AS1j) +DECL_LLVM_GEN_FUNCTION(ATOMIC_INC1, _Z20__gen_ocl_atomic_incPU3AS3j) +DECL_LLVM_GEN_FUNCTION(ATOMIC_DEC0, _Z20__gen_ocl_atomic_decPU3AS1j) +DECL_LLVM_GEN_FUNCTION(ATOMIC_DEC1, _Z20__gen_ocl_atomic_decPU3AS3j) +DECL_LLVM_GEN_FUNCTION(ATOMIC_CMPXCHG0, _Z24__gen_ocl_atomic_cmpxchgPU3AS1jjj) +DECL_LLVM_GEN_FUNCTION(ATOMIC_CMPXCHG1, _Z24__gen_ocl_atomic_cmpxchgPU3AS3jjj) + // saturation related functions. DECL_LLVM_GEN_FUNCTION(SADD_SAT_CHAR, _Z12ocl_sadd_satcc) DECL_LLVM_GEN_FUNCTION(SADD_SAT_SHORT, _Z12ocl_sadd_satss) diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h index 0bb9a67..148ac4d 100644 --- a/backend/src/ocl_stdlib.h +++ b/backend/src/ocl_stdlib.h @@ -1,4 +1,4 @@ -/* +/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or @@ -5129,6 +5129,104 @@ INLINE void write_mem_fence(cl_mem_fence_flags flags) { } ///////////////////////////////////////////////////////////////////////////// +// Atomic functions +///////////////////////////////////////////////////////////////////////////// +OVERLOADABLE uint __gen_ocl_atomic_add(__global uint *p, uint val); +OVERLOADABLE uint __gen_ocl_atomic_add(__local uint *p, uint val); +OVERLOADABLE uint __gen_ocl_atomic_sub(__global uint *p, uint val); +OVERLOADABLE uint __gen_ocl_atomic_sub(__local uint *p, uint val); +OVERLOADABLE uint __gen_ocl_atomic_and(__global uint *p, uint val); +OVERLOADABLE uint __gen_ocl_atomic_and(__local uint *p, uint val); +OVERLOADABLE uint __gen_ocl_atomic_or(__global uint *p, uint val); +OVERLOADABLE uint __gen_ocl_atomic_or(__local uint *p, uint val); +OVERLOADABLE uint __gen_ocl_atomic_xor(__global uint *p, uint val); +OVERLOADABLE uint __gen_ocl_atomic_xor(__local uint *p, uint val); +OVERLOADABLE uint __gen_ocl_atomic_xchg(__global uint *p, uint val); +OVERLOADABLE uint __gen_ocl_atomic_xchg(__local uint *p, uint val); +OVERLOADABLE uint __gen_ocl_atomic_inc(__global uint *p); +OVERLOADABLE uint __gen_ocl_atomic_inc(__local uint *p); +OVERLOADABLE uint __gen_ocl_atomic_dec(__global uint *p); +OVERLOADABLE uint __gen_ocl_atomic_dec(__local uint *p); +OVERLOADABLE uint __gen_ocl_atomic_cmpxchg(__global uint *p, uint cmp, uint val); +OVERLOADABLE uint __gen_ocl_atomic_cmpxchg(__local uint *p, uint cmp, uint val); +OVERLOADABLE uint __gen_ocl_atomic_imin(__global uint *p, uint val); +OVERLOADABLE uint __gen_ocl_atomic_imin(__local uint *p, uint val); +OVERLOADABLE uint __gen_ocl_atomic_imax(__global uint *p, uint val); +OVERLOADABLE uint __gen_ocl_atomic_imax(__local uint *p, uint val); +OVERLOADABLE uint __gen_ocl_atomic_umin(__global uint *p, uint val); +OVERLOADABLE uint __gen_ocl_atomic_umin(__local uint *p, uint val); +OVERLOADABLE uint __gen_ocl_atomic_umax(__global uint *p, uint val); +OVERLOADABLE uint __gen_ocl_atomic_umax(__local uint *p, uint val); + +#define DECL_ATOMIC_OP_SPACE(NAME, TYPE, SPACE, PREFIX) \ + INLINE_OVERLOADABLE TYPE atomic_##NAME (volatile SPACE TYPE *p, TYPE val) { \ + return (TYPE)__gen_ocl_##PREFIX##NAME((SPACE uint *)p, val); \ + } + +#define DECL_ATOMIC_OP_TYPE(NAME, TYPE, PREFIX) \ + DECL_ATOMIC_OP_SPACE(NAME, TYPE, __global, PREFIX) \ + DECL_ATOMIC_OP_SPACE(NAME, TYPE, __local, PREFIX) \ + +#define DECL_ATOMIC_OP(NAME) \ + DECL_ATOMIC_OP_TYPE(NAME, uint, atomic_) \ + DECL_ATOMIC_OP_TYPE(NAME, int, atomic_) + +DECL_ATOMIC_OP(add) +DECL_ATOMIC_OP(sub) +DECL_ATOMIC_OP(and) +DECL_ATOMIC_OP(or) +DECL_ATOMIC_OP(xor) +DECL_ATOMIC_OP(xchg) +DECL_ATOMIC_OP_TYPE(xchg, float, atomic_) +DECL_ATOMIC_OP_TYPE(min, int, atomic_i) +DECL_ATOMIC_OP_TYPE(max, int, atomic_i) +DECL_ATOMIC_OP_TYPE(min, uint, atomic_u) +DECL_ATOMIC_OP_TYPE(max, uint, atomic_u) + +#undef DECL_ATOMIC_OP +#undef DECL_ATOMIC_OP_TYPE +#undef DECL_ATOMIC_OP_SPACE + +#define DECL_ATOMIC_OP_SPACE(NAME, TYPE, SPACE) \ + INLINE_OVERLOADABLE TYPE atomic_##NAME (volatile SPACE TYPE *p) { \ + return (TYPE)__gen_ocl_atomic_##NAME((SPACE uint *)p); \ + } + +#define DECL_ATOMIC_OP_TYPE(NAME, TYPE) \ + DECL_ATOMIC_OP_SPACE(NAME, TYPE, __global) \ + DECL_ATOMIC_OP_SPACE(NAME, TYPE, __local) + +#define DECL_ATOMIC_OP(NAME) \ + DECL_ATOMIC_OP_TYPE(NAME, uint) \ + DECL_ATOMIC_OP_TYPE(NAME, int) + +DECL_ATOMIC_OP(inc) +DECL_ATOMIC_OP(dec) + +#undef DECL_ATOMIC_OP +#undef DECL_ATOMIC_OP_TYPE +#undef DECL_ATOMIC_OP_SPACE + +#define DECL_ATOMIC_OP_SPACE(NAME, TYPE, SPACE) \ + INLINE_OVERLOADABLE TYPE atomic_##NAME (volatile SPACE TYPE *p, TYPE cmp, TYPE val) { \ + return (TYPE)__gen_ocl_atomic_##NAME((SPACE uint *)p, (uint)cmp, (uint)val); \ + } + +#define DECL_ATOMIC_OP_TYPE(NAME, TYPE) \ + DECL_ATOMIC_OP_SPACE(NAME, TYPE, __global) \ + DECL_ATOMIC_OP_SPACE(NAME, TYPE, __local) + +#define DECL_ATOMIC_OP(NAME) \ + DECL_ATOMIC_OP_TYPE(NAME, uint) \ + DECL_ATOMIC_OP_TYPE(NAME, int) + +DECL_ATOMIC_OP(cmpxchg) + +#undef DECL_ATOMIC_OP +#undef DECL_ATOMIC_OP_TYPE +#undef DECL_ATOMIC_OP_SPACE + +///////////////////////////////////////////////////////////////////////////// // Force the compilation to SIMD8 or SIMD16 ///////////////////////////////////////////////////////////////////////////// -- 2.7.4