From 1f854302ef8fb59cd5b4a5921e86dd84799ea3b0 Mon Sep 17 00:00:00 2001 From: Yang Rong Date: Thu, 27 Jun 2013 16:47:57 +0800 Subject: [PATCH] Add atomic test case. The test case include local memory and global memory, atomic operations from different threads and different work groups. Signed-off-by: Yang Rong Reviewed-by: Zhigang Gong --- kernels/compiler_atomic_functions.cl | 55 +++++++++++++++++------ utests/CMakeLists.txt | 1 + utests/compiler_atomic_functions.cpp | 87 ++++++++++++++++++++++++++++++++++-- 3 files changed, 127 insertions(+), 16 deletions(-) diff --git a/kernels/compiler_atomic_functions.cl b/kernels/compiler_atomic_functions.cl index 23f3e73..24f17c2 100644 --- a/kernels/compiler_atomic_functions.cl +++ b/kernels/compiler_atomic_functions.cl @@ -1,14 +1,43 @@ -/* test OpenCL 1.1 Atomic Functions (section 6.11.1, 9.4) */ -__kernel void compiler_atomic_functions(global int *a, global int *b) { - atomic_add(a, *b); - atomic_sub(a, *b); - atomic_xchg(a, *b); - atomic_inc(a); - atomic_dec(a); - atomic_cmpxchg(a, b, 100); - atomic_min(a, *b); - atomic_max(a, *b); - atomic_and(a, *b); - atomic_or(a, *b); - atomic_xor(a, *b); +__kernel void compiler_atomic_functions(__global int *dst, __local int *tmp, __global int *src) { + int lid = get_local_id(0); + int i = lid % 12; + atomic_xchg(&tmp[4], -1); + switch(i) { + case 0: atomic_inc(&tmp[i]); break; + case 1: atomic_dec(&tmp[i]); break; + case 2: atomic_add(&tmp[i], src[lid]); break; + case 3: atomic_sub(&tmp[i], src[lid]); break; + case 4: atomic_and(&tmp[i], ~(src[lid]<<(lid>>2))); break; + case 5: atomic_or (&tmp[i], src[lid]<<(lid>>2)); break; + case 6: atomic_xor(&tmp[i], src[lid]); break; + case 7: atomic_min(&tmp[i], -src[lid]); break; + case 8: atomic_max(&tmp[i], src[lid]); break; + case 9: atomic_min((__local unsigned int *)&tmp[i], -src[lid]); break; + case 10: atomic_max((__local unsigned int *)&tmp[i], src[lid]); break; + case 11: atomic_cmpxchg(&(tmp[i]), 0, src[10]); break; + default: break; + } + + switch(i) { + case 0: atomic_inc(&dst[i]); break; + case 1: atomic_dec(&dst[i]); break; + case 2: atomic_add(&dst[i], src[lid]); break; + case 3: atomic_sub(&dst[i], src[lid]); break; + case 4: atomic_and(&dst[i], ~(src[lid]<<(lid>>2))); break; + case 5: atomic_or (&dst[i], src[lid]<<(lid>>2)); break; + case 6: atomic_xor(&dst[i], src[lid]); break; + case 7: atomic_min(&dst[i], -src[lid]); break; + case 8: atomic_max(&dst[i], src[lid]); break; + case 9: atomic_min((__global unsigned int *)&dst[i], -src[lid]); break; + case 10: atomic_max((__global unsigned int *)&dst[i], src[lid]); break; + case 11: atomic_cmpxchg(&dst[i], 0, src[10]); break; + default: break; + } + + barrier(CLK_GLOBAL_MEM_FENCE); + + if(get_global_id(0) == 0) { + for(i=0; i<12; i=i+1) + atomic_add(&dst[i], tmp[i]); + } } diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index c0908ea..c115de3 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -73,6 +73,7 @@ set (utests_sources compiler_write_only_shorts.cpp compiler_switch.cpp compiler_math.cpp + compiler_atomic_functions.cpp compiler_insn_selection_min.cpp compiler_insn_selection_max.cpp compiler_insn_selection_masked_min_max.cpp diff --git a/utests/compiler_atomic_functions.cpp b/utests/compiler_atomic_functions.cpp index 20202da..71e8384 100644 --- a/utests/compiler_atomic_functions.cpp +++ b/utests/compiler_atomic_functions.cpp @@ -1,10 +1,91 @@ #include "utest_helper.hpp" +#include +#include -void compiler_atomic_functions(void) +#define GROUP_NUM 16 +#define LOCAL_SIZE 64 +static void cpu_compiler_atomic(int *dst, int *src) { - OCL_CREATE_KERNEL("compiler_atomic_functions"); + dst[4] = 0xffffffff; + int tmp[16] = { 0 }; + + for(int j=0; j>2)); break; + case 5: tmp[i] |= src[j]<<(j>>2); break; + case 6: tmp[i] ^= src[j]; break; + case 7: tmp[i] = tmp[i] < -src[j] ? tmp[i] : -src[j]; break; + case 8: tmp[i] = tmp[i] > src[j] ? tmp[i] : src[j]; break; + case 9: tmp[i] = (unsigned int)tmp[i] < (unsigned int)(-src[j]) ? tmp[i] : -src[j]; break; + case 10: tmp[i] = (unsigned int)tmp[i] > (unsigned int)(src[j]) ? tmp[i] : src[j]; break; + case 11: tmp[i] = src[10]; break; + default: break; + } + } + + for(int k=0; k>2)); break; + case 5: dst[i] |= src[j]<<(j>>2); break; + case 6: dst[i] ^= src[j]; break; + case 7: dst[i] = dst[i] < -src[j] ? dst[i] : -src[j]; break; + case 8: dst[i] = dst[i] > src[j] ? dst[i] : src[j]; break; + case 9: dst[i] = (unsigned int)dst[i] < (unsigned int)(-src[j]) ? dst[i] : -src[j]; break; + case 10: dst[i] = (unsigned int)dst[i] > (unsigned int)(src[j]) ? dst[i] : src[j]; break; + case 11: dst[i] = src[10]; break; + default: break; + } + } + } + + for(int i=0; i<12; i++) + dst[i] += tmp[i]; } -MAKE_UTEST_FROM_FUNCTION(compiler_atomic_functions); +static void compiler_atomic_functions(void) +{ + const size_t n = GROUP_NUM * LOCAL_SIZE; + int cpu_dst[16] = {0}, cpu_src[256]; + globals[0] = n; + locals[0] = LOCAL_SIZE; + + // Setup kernel and buffers + OCL_CREATE_KERNEL("compiler_atomic_functions"); + OCL_CREATE_BUFFER(buf[0], 0, 16 * sizeof(int), NULL); + OCL_CREATE_BUFFER(buf[1], 0, locals[0] * sizeof(int), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, 16 * sizeof(int), NULL); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[1]); + + OCL_MAP_BUFFER(1); + for (uint32_t i = 0; i < locals[0]; ++i) + cpu_src[i] = ((int*)buf_data[1])[i] = rand() & 0xff; + cpu_compiler_atomic(cpu_dst, cpu_src); + OCL_UNMAP_BUFFER(1); + OCL_NDRANGE(1); + + OCL_MAP_BUFFER(0); + + // Check results + for(int i=0; i<12; i++) { + //printf("The dst(%d) gpu(0x%x) cpu(0x%x)\n", i, ((uint32_t *)buf_data[0])[i], cpu_dst[i]); + OCL_ASSERT(((int *)buf_data[0])[i] == cpu_dst[i]); + } + OCL_UNMAP_BUFFER(0); +} +MAKE_UTEST_FROM_FUNCTION(compiler_atomic_functions) -- 2.7.4