From c4d1f4029965af26e7f1a0d3d22df1a8ecd4dd2e Mon Sep 17 00:00:00 2001 From: Zhigang Gong Date: Tue, 6 Aug 2013 12:01:46 +0800 Subject: [PATCH] GBE: enable double vector load/store support. We have some accurate problem for double calculation on GPU side. I have to change the test case for double type to add a tolerate error when check the double data result. Signed-off-by: Zhigang Gong Reviewed-by: "Xing, Homer" --- backend/src/llvm/llvm_gen_backend.cpp | 2 -- backend/src/ocl_stdlib.tmpl.h | 1 + kernels/compiler_vector_load_store.cl | 6 +++--- utests/compiler_vector_load_store.cpp | 12 ++++++++---- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index b5963ad..18448cf 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2371,8 +2371,6 @@ namespace gbe // Scalar is easy. We neednot build register tuples if (isScalarType(llvmType) == true) { const ir::Type type = getType(ctx, llvmType); - //if(type == ir::TYPE_DOUBLE) // 64bit-float load(store) don't support SIMD16 - // OCL_SIMD_WIDTH = 8; const ir::Register values = this->getRegister(llvmValues); if (isLoad) ctx.LOAD(type, ptr, addrSpace, dwAligned, values); diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 33880ed..84f15ca 100644 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -959,6 +959,7 @@ DECL_UNTYPED_RW_ALL(uint) DECL_UNTYPED_RW_ALL(long) DECL_UNTYPED_RW_ALL(ulong) DECL_UNTYPED_RW_ALL(float) +DECL_UNTYPED_RW_ALL(double) #undef DECL_UNTYPED_RW_ALL #undef DECL_UNTYPED_RW_ALL_SPACE diff --git a/kernels/compiler_vector_load_store.cl b/kernels/compiler_vector_load_store.cl index 30f0e1e..320194e 100644 --- a/kernels/compiler_vector_load_store.cl +++ b/kernels/compiler_vector_load_store.cl @@ -1,5 +1,5 @@ /* test OpenCL 1.1 Vector Data Load/Store Functions (section 6.11.7) */ - +#pragma OPENCL EXTENSION cl_khr_fp64 : enable #define OFFSET2(type) (type ##2) {(type)1, (type)2} #define OFFSET3(type) (type ##3) {(type)1, (type)2, (type)3} #define OFFSET4(type) (type ##4) {(type)1, (type)2, (type)3, (type)4} @@ -24,10 +24,10 @@ __kernel void test_##type ##n(__global type *pin, \ TEST_TYPE(ushort,n)\ TEST_TYPE(int,n) \ TEST_TYPE(uint,n) \ - TEST_TYPE(float,n) + TEST_TYPE(float,n) \ + TEST_TYPE(double,n) #if 0 - TEST_TYPE(double,n) TEST_TYPE(long,n) TEST_TYPE(ulong,n) TEST_TYPE(half,n) diff --git a/utests/compiler_vector_load_store.cpp b/utests/compiler_vector_load_store.cpp index 79f284f..7deb7cb 100644 --- a/utests/compiler_vector_load_store.cpp +++ b/utests/compiler_vector_load_store.cpp @@ -1,4 +1,5 @@ #include "utest_helper.hpp" +#include template static void compiler_vector_load_store(int elemNum, const char *kernelName) { @@ -9,8 +10,8 @@ static void compiler_vector_load_store(int elemNum, const char *kernelName) buf_data[0] = (T*) malloc(sizeof(T) * n); for (uint32_t i = 0; i < n; ++i) ((T*)buf_data[0])[i] = i; - OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(float), buf_data[0]); - OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); + OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(T), buf_data[0]); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL); free(buf_data[0]); buf_data[0] = NULL; @@ -27,7 +28,10 @@ static void compiler_vector_load_store(int elemNum, const char *kernelName) for (uint32_t i = 0; i < n; ++i) { int shift = ((i % elemNum) + 1); - OCL_ASSERT(((T*)buf_data[1])[i] == (T)(((T*)buf_data[0])[i] + shift)); + if (strstr(kernelName, "double") == NULL) + OCL_ASSERT(((T*)buf_data[1])[i] == (T)(((T*)buf_data[0])[i] + shift)); + else + OCL_ASSERT((((T*)buf_data[1])[i] - ((T)((T*)buf_data[0])[i] + shift)) < 1e-5); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); @@ -54,6 +58,6 @@ test_all_vector(uint16_t, ushort) test_all_vector(int32_t, int) test_all_vector(uint32_t, uint) test_all_vector(float, float) -//test_all_vector(double, double) +test_all_vector(double, double) //test_all_vector(int64_t, long) //test_all_vector(uint64_t, ulong) -- 2.7.4