From 5d6d9428ec799aa90a076e5ab7a109b8b06df3ad Mon Sep 17 00:00:00 2001 From: Junyan He Date: Thu, 4 Jul 2013 15:35:34 +0800 Subject: [PATCH] Add the test case for builtin step() function The step function has two kind of prototype: gentypen step(gentypen edge, gentypen x) and gentypen step(float edge, gentypen x) The first's test name is compiler_step_floatX The second's test name is compiler_stepf_floatX Signed-off-by: Junyan He Reviewed-by: Song, Ruiling --- kernels/compiler_step.cl | 38 ++++++ utests/CMakeLists.txt | 1 + utests/compiler_step.cpp | 338 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 377 insertions(+) create mode 100644 kernels/compiler_step.cl create mode 100644 utests/compiler_step.cpp diff --git a/kernels/compiler_step.cl b/kernels/compiler_step.cl new file mode 100644 index 0000000..ef77f05 --- /dev/null +++ b/kernels/compiler_step.cl @@ -0,0 +1,38 @@ +#define COMPILER_STEP_FUNC_N(TYPE, N) \ + kernel void compiler_step_##TYPE##N ( \ + global TYPE##N* edge, global TYPE##N* x, global TYPE##N* dst) { \ + int i = get_global_id(0); \ + dst[i] = step(edge[i], x[i]); \ + } + +kernel void compiler_step_float (global float* edge, + global float* x, global float* dst) +{ + int i = get_global_id(0); + dst[i] = step(edge[i], x[i]); +} + +COMPILER_STEP_FUNC_N(float, 2) +COMPILER_STEP_FUNC_N(float, 3) +COMPILER_STEP_FUNC_N(float, 4) +COMPILER_STEP_FUNC_N(float, 8) +COMPILER_STEP_FUNC_N(float, 16) + +#define COMPILER_STEPF_FUNC_N(TYPE, N) \ + kernel void compiler_stepf_##TYPE##N ( \ + float edge, global TYPE##N* x, global TYPE##N* dst) { \ + int i = get_global_id(0); \ + dst[i] = step(edge, x[i]); \ + } + +kernel void compiler_stepf_float (float edge, global float* x, global float* dst) +{ + int i = get_global_id(0); + dst[i] = step(edge, x[i]); +} + +COMPILER_STEPF_FUNC_N(float, 2) +COMPILER_STEPF_FUNC_N(float, 3) +COMPILER_STEPF_FUNC_N(float, 4) +COMPILER_STEPF_FUNC_N(float, 8) +COMPILER_STEPF_FUNC_N(float, 16) diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 0863071..82e0a40 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -30,6 +30,7 @@ set (utests_sources compiler_copy_image.cpp compiler_copy_image_3d.cpp compiler_copy_buffer_row.cpp + compiler_step.cpp compiler_fabs.cpp compiler_abs.cpp compiler_abs_diff.cpp diff --git a/utests/compiler_step.cpp b/utests/compiler_step.cpp new file mode 100644 index 0000000..3285dda --- /dev/null +++ b/utests/compiler_step.cpp @@ -0,0 +1,338 @@ +#include "utest_helper.hpp" +#include "string.h" + +template +struct cl_vec { + T ptr[((N+1)/2)*2]; //align to 2 elements. + + typedef cl_vec vec_type; + + cl_vec(void) { + memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); + } + cl_vec(vec_type & other) { + memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); + memcpy (this->ptr, other.ptr, sizeof(T) * N); + } + + vec_type& operator= (vec_type & other) { + memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); + memcpy (this->ptr, other.ptr, sizeof(T) * N); + return *this; + } + + template vec_type& operator= (cl_vec & other) { + memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); + memcpy (this->ptr, other.ptr, sizeof(T) * N); + return *this; + } + + bool operator== (vec_type & other) { + return !memcmp (this->ptr, other.ptr, sizeof(T) * N); + } + + void step (vec_type & other) { + int i = 0; + for (; i < N; i++) { + T a = ptr[i]; + T edge = other.ptr[i]; + T f = a < edge ? 0.0 : 1.0; + ptr[i] = f; + } + } + + void step (float & edge) { + int i = 0; + for (; i < N; i++) { + T a = ptr[i]; + T f = a < edge ? 0.0 : 1.0; + ptr[i] = f; + } + } +}; + +template static void cpu (int global_id, + cl_vec *edge, cl_vec *src, cl_vec *dst) +{ + cl_vec v = src[global_id]; + v.step(edge[global_id]); + dst[global_id] = v; +} + +template static void cpu(int global_id, T *edge, T *src, U *dst) +{ + T f = src[global_id]; + T e = edge[global_id]; + f = f < e ? 0.0 : 1.0; + dst[global_id] = (U)f; +} + +template static void cpu (int global_id, + float edge, cl_vec *src, cl_vec *dst) +{ + cl_vec v = src[global_id]; + v.step(edge); + dst[global_id] = v; +} + +template static void cpu(int global_id, float edge, T *src, U *dst) +{ + T f = src[global_id]; + f = f < edge ? 0.0 : 1.0; + dst[global_id] = (U)f; +} + +template static void gen_rand_val (cl_vec& vect) +{ + int i = 0; + + memset(vect.ptr, 0, sizeof(T) * ((N+1)/2)*2); + for (; i < N; i++) { + vect.ptr[i] = static_cast(.1f * (rand() & 15) - .75f); + } +} + +template static void gen_rand_val (T & val) +{ + val = static_cast(.1f * (rand() & 15) - .75f); +} + +template +inline static void print_data (T& val) +{ + if (std::is_unsigned::value) + printf(" %u", val); + else + printf(" %d", val); +} + +inline static void print_data (float& val) +{ + printf(" %f", val); +} + +template static void dump_data (cl_vec* edge, + cl_vec* src, cl_vec* dst, int n) +{ + U* val = reinterpret_cast(dst); + + n = n*((N+1)/2)*2; + + printf("\nEdge: \n"); + for (int32_t i = 0; i < (int32_t) n; ++i) { + print_data(((T *)buf_data[0])[i]); + } + printf("\nx: \n"); + for (int32_t i = 0; i < (int32_t) n; ++i) { + print_data(((T *)buf_data[1])[i]); + } + + printf("\nCPU: \n"); + for (int32_t i = 0; i < (int32_t) n; ++i) { + print_data(val[i]); + } + printf("\nGPU: \n"); + for (int32_t i = 0; i < (int32_t) n; ++i) { + print_data(((U *)buf_data[2])[i]); + } +} + +template static void dump_data (T* edge, T* src, U* dst, int n) +{ + printf("\nedge: \n"); + for (int32_t i = 0; i < (int32_t) n; ++i) { + print_data(((T *)buf_data[0])[i]); + } + + printf("\nx: \n"); + for (int32_t i = 0; i < (int32_t) n; ++i) { + print_data(((T *)buf_data[1])[i]); + } + + printf("\nCPU: \n"); + for (int32_t i = 0; i < (int32_t) n; ++i) { + print_data(dst[i]); + } + printf("\nGPU: \n"); + for (int32_t i = 0; i < (int32_t) n; ++i) { + print_data(((U *)buf_data[2])[i]); + } +} + +template static void dump_data (float edge, + cl_vec* src, cl_vec* dst, int n) +{ + U* val = reinterpret_cast(dst); + + n = n*((N+1)/2)*2; + + printf("\nEdge: %f\n", edge); + printf("\nx: \n"); + for (int32_t i = 0; i < (int32_t) n; ++i) { + print_data(((T *)buf_data[0])[i]); + } + + printf("\nCPU: \n"); + for (int32_t i = 0; i < (int32_t) n; ++i) { + print_data(val[i]); + } + printf("\nGPU: \n"); + for (int32_t i = 0; i < (int32_t) n; ++i) { + print_data(((U *)buf_data[1])[i]); + } +} + +template static void dump_data (float edge, T* src, U* dst, int n) +{ + printf("\nedge: %f\n", edge); + printf("\nx: \n"); + for (int32_t i = 0; i < (int32_t) n; ++i) { + print_data(((T *)buf_data[0])[i]); + } + + printf("\nCPU: \n"); + for (int32_t i = 0; i < (int32_t) n; ++i) { + print_data(dst[i]); + } + printf("\nGPU: \n"); + for (int32_t i = 0; i < (int32_t) n; ++i) { + print_data(((U *)buf_data[1])[i]); + } +} + +template static void compiler_step_with_type(void) +{ + const size_t n = 16; + T cpu_dst[n], cpu_src[n]; + T edge[n]; + + // Setup buffers + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL); + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(T), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); + globals[0] = n; + locals[0] = n; + + // Run random tests + for (uint32_t pass = 0; pass < 8; ++pass) { + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + + /* Clear the dst buffer to avoid random data. */ + OCL_MAP_BUFFER(2); + memset(buf_data[2], 0, sizeof(T) * n); + OCL_UNMAP_BUFFER(2); + + for (int32_t i = 0; i < (int32_t) n; ++i) { + gen_rand_val(cpu_src[i]); + gen_rand_val(edge[i]); + } + + memcpy(buf_data[1], cpu_src, sizeof(T) * n); + memcpy(buf_data[0], edge, sizeof(T) * n); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Run on CPU + for (int32_t i = 0; i < (int32_t) n; ++i) + cpu(i, edge, cpu_src, cpu_dst); + + // Compare + OCL_MAP_BUFFER(2); + + //dump_data(edge, cpu_src, cpu_dst, n); + + OCL_ASSERT(!memcmp(buf_data[2], cpu_dst, sizeof(T) * n)); + OCL_UNMAP_BUFFER(2); + OCL_UNMAP_BUFFER(1); + OCL_UNMAP_BUFFER(0); + } +} + +#define STEP_TEST_TYPE(TYPE) \ + static void compiler_step_##TYPE (void) \ + { \ + OCL_CALL (cl_kernel_init, "compiler_step.cl", "compiler_step_"#TYPE, SOURCE, NULL); \ + compiler_step_with_type(); \ + } \ + MAKE_UTEST_FROM_FUNCTION(compiler_step_##TYPE); + +typedef cl_vec float2; +typedef cl_vec float3; +typedef cl_vec float4; +typedef cl_vec float8; +typedef cl_vec float16; +STEP_TEST_TYPE(float) +STEP_TEST_TYPE(float2) +STEP_TEST_TYPE(float3) +STEP_TEST_TYPE(float4) +STEP_TEST_TYPE(float8) +STEP_TEST_TYPE(float16) + + +template static void compiler_stepf_with_type(void) +{ + const size_t n = 16; + T cpu_dst[n], cpu_src[n]; + float edge = (float)(.1f * (rand() & 15) - .75f); + + // Setup buffers + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL); + OCL_SET_ARG(0, sizeof(float), &edge); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[1]); + globals[0] = n; + locals[0] = n; + + // Run random tests + for (uint32_t pass = 0; pass < 8; ++pass) { + OCL_MAP_BUFFER(0); + + /* Clear the dst buffer to avoid random data. */ + OCL_MAP_BUFFER(1); + memset(buf_data[1], 0, sizeof(T) * n); + OCL_UNMAP_BUFFER(1); + + for (int32_t i = 0; i < (int32_t) n; ++i) { + gen_rand_val(cpu_src[i]); + } + + memcpy(buf_data[0], cpu_src, sizeof(T) * n); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Run on CPU + for (int32_t i = 0; i < (int32_t) n; ++i) + cpu(i, edge, cpu_src, cpu_dst); + + // Compare + OCL_MAP_BUFFER(1); + + //dump_data(edge, cpu_src, cpu_dst, n); + + OCL_ASSERT(!memcmp(buf_data[1], cpu_dst, sizeof(T) * n)); + OCL_UNMAP_BUFFER(1); + OCL_UNMAP_BUFFER(0); + } +} + +#define STEPF_TEST_TYPE(TYPE) \ + static void compiler_stepf_##TYPE (void) \ + { \ + OCL_CALL (cl_kernel_init, "compiler_step.cl", "compiler_stepf_"#TYPE, SOURCE, NULL); \ + compiler_stepf_with_type(); \ + } \ + MAKE_UTEST_FROM_FUNCTION(compiler_stepf_##TYPE); + +STEPF_TEST_TYPE(float) +STEPF_TEST_TYPE(float2) +STEPF_TEST_TYPE(float3) +STEPF_TEST_TYPE(float4) +STEPF_TEST_TYPE(float8) +STEPF_TEST_TYPE(float16) -- 2.7.4