From: Zhigang Gong Date: Wed, 27 Aug 2014 02:33:42 +0000 (+0800) Subject: Add new vload benchmark/test case. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=44929d4847b7b5d0cf5929a3155b6193091c26b7;p=contrib%2Fbeignet.git Add new vload benchmark/test case. v2: refine the benchmark case and don't mix it with normal unit test cases. Signed-off-by: Zhigang Gong Reviewed-by: "Song, Ruiling" --- diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index d96a2e0..0a959c8 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -10,8 +10,13 @@ set (benchmark_sources ../utests/utest.cpp ../utests/utest_file_map.cpp ../utests/utest_helper.cpp + ../utests/vload_bench.cpp enqueue_copy_buf.cpp) + +SET(CMAKE_CXX_FLAGS "-DBUILD_BENCHMARK ${CMAKE_CXX_FLAGS}") +SET(CMAKE_C_FLAGS "-DBUILD_BENCHMARK ${CMAKE_C_FLAGS}") + ADD_LIBRARY(benchmarks SHARED ${ADDMATHFUNC} ${benchmark_sources}) #TARGET_LINK_LIBRARIES(benchmarks cl m ${OPENGL_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) diff --git a/benchmark/benchmark_run.cpp b/benchmark/benchmark_run.cpp index b29ccc3..e5c7057 100644 --- a/benchmark/benchmark_run.cpp +++ b/benchmark/benchmark_run.cpp @@ -98,7 +98,7 @@ int main(int argc, char *argv[]) case 'n': try { - UTest::runAllNoIssue(); + UTest::runAllBenchMark(); } catch (Exception e){ std::cout << " " << e.what() << " [SUCCESS]" << std::endl; diff --git a/kernels/vload_bench.cl b/kernels/vload_bench.cl new file mode 100644 index 0000000..c906c75 --- /dev/null +++ b/kernels/vload_bench.cl @@ -0,0 +1,33 @@ +#define VLOAD_BENCH(T, N, M) \ +__kernel void \ +vload_bench_##M ##T ##N(__global T* src, __global uint* dst, uint offset) \ +{ \ + int id = (int)get_global_id(0); \ + uint ##N srcV = 0; \ + for(int i = 0; i < M; i++) \ + { \ + srcV += convert_uint ##N(vload ##N(id + (i & 0xFFFF), src + offset)); \ + } \ + vstore ##N(srcV, id, dst);\ + /*if (id < 16)*/ \ + /*printf("id %d %d %d\n", id, srcV.s0, srcV.s1);*/ \ +} + +#define VLOAD_BENCH_ALL_VECTOR(T, N_ITERATIONS) \ + VLOAD_BENCH(T, 2, N_ITERATIONS) \ + VLOAD_BENCH(T, 3, N_ITERATIONS) \ + VLOAD_BENCH(T, 4, N_ITERATIONS) \ + VLOAD_BENCH(T, 8, N_ITERATIONS) \ + VLOAD_BENCH(T, 16, N_ITERATIONS) + +#define VLOAD_BENCH_ALL_TYPES(N_ITERATIONS) \ + VLOAD_BENCH_ALL_VECTOR(uchar, N_ITERATIONS) \ + VLOAD_BENCH_ALL_VECTOR(char, N_ITERATIONS) \ + VLOAD_BENCH_ALL_VECTOR(ushort, N_ITERATIONS) \ + VLOAD_BENCH_ALL_VECTOR(short, N_ITERATIONS) \ + VLOAD_BENCH_ALL_VECTOR(uint, N_ITERATIONS) \ + VLOAD_BENCH_ALL_VECTOR(int, N_ITERATIONS) \ + VLOAD_BENCH_ALL_VECTOR(float, N_ITERATIONS) + +VLOAD_BENCH_ALL_TYPES(1) +VLOAD_BENCH_ALL_TYPES(10000) diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 721e6f7..b30e6f9 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -184,6 +184,7 @@ set (utests_sources image_1D_buffer.cpp compare_image_2d_and_1d_array.cpp compiler_constant_expr.cpp + vload_bench.cpp utest_assert.cpp utest.cpp utest_file_map.cpp diff --git a/utests/utest.cpp b/utests/utest.cpp index b491cae..d06dedc 100644 --- a/utests/utest.cpp +++ b/utests/utest.cpp @@ -106,8 +106,8 @@ void catch_signal(void){ } } -UTest::UTest(Function fn, const char *name, bool haveIssue, bool needDestroyProgram) - : fn(fn), name(name), haveIssue(haveIssue), needDestroyProgram(needDestroyProgram) { +UTest::UTest(Function fn, const char *name, bool isBenchMark, bool haveIssue, bool needDestroyProgram) + : fn(fn), name(name), isBenchMark(isBenchMark), haveIssue(haveIssue), needDestroyProgram(needDestroyProgram) { if (utestList == NULL) { utestList = new vector; @@ -165,7 +165,19 @@ void UTest::runAllNoIssue(void) { for (; retStatistics.finishrun < utestList->size(); ++retStatistics.finishrun) { const UTest &utest = (*utestList)[retStatistics.finishrun]; - if (utest.fn == NULL || utest.haveIssue) continue; + if (utest.fn == NULL || utest.haveIssue || utest.isBenchMark) continue; + do_run(utest); + cl_kernel_destroy(utest.needDestroyProgram); + cl_buffer_destroy(); + } +} + +void UTest::runAllBenchMark(void) { + if (utestList == NULL) return; + + for (; retStatistics.finishrun < utestList->size(); ++retStatistics.finishrun) { + const UTest &utest = (*utestList)[retStatistics.finishrun]; + if (utest.fn == NULL || utest.haveIssue || !utest.isBenchMark) continue; do_run(utest); cl_kernel_destroy(utest.needDestroyProgram); cl_buffer_destroy(); diff --git a/utests/utest.hpp b/utests/utest.hpp index 375ef70..26ce6f8 100644 --- a/utests/utest.hpp +++ b/utests/utest.hpp @@ -47,11 +47,13 @@ struct UTest /*! Empty test */ UTest(void); /*! Build a new unit test and append it to the unit test list */ - UTest(Function fn, const char *name, bool haveIssue = false, bool needDestroyProgram = true); + UTest(Function fn, const char *name, bool isBenchMark = false, bool haveIssue = false, bool needDestroyProgram = true); /*! Function to execute */ Function fn; /*! Name of the test */ const char *name; + /*! whether it is a bench mark. */ + bool isBenchMark; /*! Indicate whether current test cases has issue to be fixes */ bool haveIssue; /*! Indicate whether destroy kernels/program. */ @@ -62,6 +64,8 @@ struct UTest static void run(const char *name); /*! Run all the tests without known issue*/ static void runAllNoIssue(void); + /*! Run all the benchmark. */ + static void runAllBenchMark(void); /*! Run all the tests */ static void runAll(void); /*! List all test cases */ @@ -77,7 +81,7 @@ struct UTest #define MAKE_UTEST_FROM_FUNCTION_KEEP_PROGRAM(FN, KEEP_PROGRAM) \ static void __ANON__##FN##__(void) { UTEST_EXPECT_SUCCESS(FN()); } \ - static const UTest __##FN##__(__ANON__##FN##__, #FN, false, !(KEEP_PROGRAM)); + static const UTest __##FN##__(__ANON__##FN##__, #FN, false, false, !(KEEP_PROGRAM)); /*! Turn a function into a unit test */ @@ -91,9 +95,14 @@ struct UTest static const UTest __##FN##__(__ANON__##FN##__, #FN, true); /*! Turn a function into a unit performance test */ +#define MAKE_BENCHMARK_FROM_FUNCTION_KEEP_PROGRAM(FN, KEEP_PROGRAM) \ + static void __ANON__##FN##__(void) { BENCHMARK(FN()); } \ + static const UTest __##FN##__(__ANON__##FN##__, #FN, true, false, !(KEEP_PROGRAM)); + #define MAKE_BENCHMARK_FROM_FUNCTION(FN) \ static void __ANON__##FN##__(void) { BENCHMARK(FN()); } \ - static const UTest __##FN##__(__ANON__##FN##__, #FN); + static const UTest __##FN##__(__ANON__##FN##__, #FN, true); + /*! No assert is expected */ #define UTEST_EXPECT_SUCCESS(EXPR) \ @@ -125,14 +134,16 @@ struct UTest #define BENCHMARK(EXPR) \ do { \ - int ret = 0; \ + int ret = 0;\ try { \ ret = EXPR; \ - printf(" %s [SUCCESS] [Result: %d]\n", #EXPR, ret);\ + std::cout << " [Result: " << ret << "] [SUCCESS]" << std::endl; \ + UTest::retStatistics.passCount += 1; \ } \ catch (Exception e) { \ std::cout << " " << #EXPR << " [FAILED]" << std::endl; \ std::cout << " " << e.what() << std::endl; \ + UTest::retStatistics.failCount++; \ } \ } while (0) #endif /* __UTEST_UTEST_HPP__ */ diff --git a/utests/utest_helper.cpp b/utests/utest_helper.cpp index b57b8dc..0925daf 100644 --- a/utests/utest_helper.cpp +++ b/utests/utest_helper.cpp @@ -262,9 +262,10 @@ cl_kernel_init(const char *file_name, const char *kernel_name, int format, const goto error; } prevFileName = file_name; + + /* OCL requires to build the program even if it is created from a binary */ + OCL_CALL (clBuildProgram, program, 1, &device, build_opt, NULL, NULL); } - /* OCL requires to build the program even if it is created from a binary */ - OCL_CALL (clBuildProgram, program, 1, &device, build_opt, NULL, NULL); /* Create a kernel from the program */ if (kernel) diff --git a/utests/utest_run.cpp b/utests/utest_run.cpp index cd4356a..8883ca8 100644 --- a/utests/utest_run.cpp +++ b/utests/utest_run.cpp @@ -106,6 +106,17 @@ int main(int argc, char *argv[]) break; + case 'b': + try { + UTest::runAllBenchMark(); + } + catch (Exception e){ + std::cout << " " << e.what() << " [SUCCESS]" << std::endl; + } + + break; + + case 'h': default: usage(); diff --git a/utests/vload_bench.cpp b/utests/vload_bench.cpp new file mode 100644 index 0000000..3765996 --- /dev/null +++ b/utests/vload_bench.cpp @@ -0,0 +1,98 @@ +#include "utest_helper.hpp" +#include + +#define N_ITERATIONS 10000 + +#define T uint8_t +template +static double vload_bench(const char *kernelFunc, uint32_t N, uint32_t offset, bool benchMode) +{ + const size_t n = benchMode ? (512 * 1024) : (8 * 1024); + struct timeval start, end; + + // Setup kernel and buffers + std::string kernelName = kernelFunc + std::to_string(N); + OCL_CALL (cl_kernel_init, "vload_bench.cl", kernelName.c_str(), SOURCE, NULL); + //OCL_CREATE_KERNEL("compiler_array"); + buf_data[0] = (T*) malloc(sizeof(T) * n); + for (uint32_t i = 0; i < n; ++i) ((T*)buf_data[0])[i] = i; //rand() & ((1LL << N) - 1); + OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(T), buf_data[0]); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); + free(buf_data[0]); + buf_data[0] = NULL; + + // Run the kernel + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(uint32_t), &offset); + globals[0] = n / ((N + 1) & ~0x1); + locals[0] = 256; + if (benchMode) + gettimeofday(&start, NULL); + OCL_NDRANGE(1); + if (benchMode) { + OCL_FINISH(); + gettimeofday(&end, NULL); + double elapsed = (end.tv_sec - start.tv_sec) * 1e6 + (end.tv_usec - start.tv_usec); + double bandwidth = (globals[0] * (N_ITERATIONS) * sizeof(T) * N) / elapsed; + printf("\t%2.1fGB/S\n", bandwidth/1000.); + return bandwidth; + } else { + // Check result + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + for (uint32_t i = 0; i < globals[0]; ++i) { + OCL_ASSERT(((T*)buf_data[0])[i + offset] == ((uint32_t*)buf_data[1])[i]); + } + return 0; + } +} + +#define VLOAD_TEST(T, kT) \ +static void vload_test_ ##kT(void) \ +{ \ + uint8_t vectorSize[] = {2, 3, 4, 8, 16}; \ + for(uint32_t i = 0; i < sizeof(vectorSize); i++) { \ + for(uint32_t offset = 0; offset < vectorSize[i]; offset++) {\ + (void)vload_bench("vload_bench_1" #kT, vectorSize[i], offset, false); \ + }\ + } \ +}\ +MAKE_UTEST_FROM_FUNCTION_KEEP_PROGRAM(vload_test_ ##kT, true) + +#ifndef BUILD_BENCHMARK +VLOAD_TEST(uint8_t, uchar) +VLOAD_TEST(int8_t, char) +VLOAD_TEST(uint16_t, ushort) +VLOAD_TEST(int16_t, short) +VLOAD_TEST(uint32_t, uint) +VLOAD_TEST(int32_t, int) +VLOAD_TEST(float, float) +#endif + +#define VLOAD_BENCH(T, kT) \ +static int vload_bench_ ##kT(void) \ +{ \ + uint8_t vectorSize[] = {2, 3, 4, 8, 16}; \ + double totBandwidth = 0; \ + unsigned int j = 0;\ + printf("\n");\ + for(uint32_t i = 0; i < sizeof(vectorSize); i++, j++) { \ + printf(" Vector size %d:\n", vectorSize[i]); \ + uint32_t k = 0;\ + double bandwidthForOneSize = 0;\ + for(uint32_t offset = 0; offset < vectorSize[i]; offset++, k++) {\ + printf("\tOffset %d :", offset); \ + bandwidthForOneSize += vload_bench("vload_bench_10000" #kT, vectorSize[i], offset, true); \ + }\ + totBandwidth += bandwidthForOneSize / k;\ + } \ + return totBandwidth/j;\ +}\ +MAKE_BENCHMARK_FROM_FUNCTION_KEEP_PROGRAM(vload_bench_ ##kT, true) + +#ifdef BUILD_BENCHMARK +VLOAD_BENCH(uint8_t, uchar) +VLOAD_BENCH(uint16_t, ushort) +VLOAD_BENCH(uint32_t, uint) +#endif