From b918ba339bffcebcac52247665205238525de175 Mon Sep 17 00:00:00 2001 From: Guo Yejun Date: Thu, 27 Feb 2014 06:54:26 +0800 Subject: [PATCH] GBE: support getelementptr with ConstantExpr operand Add support during LLVM IR -> Gen IR period when the first operand of getelementptr is ConstantExpr. utest is also added. Signed-off-by: Guo Yejun Reviewed-by: Zhigang Gong --- backend/src/llvm/llvm_gen_backend.cpp | 6 ++++- kernels/compiler_getelementptr_bitcast.cl | 18 +++++++++++++ utests/CMakeLists.txt | 1 + utests/compiler_getelementptr_bitcast.cpp | 45 +++++++++++++++++++++++++++++++ 4 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 kernels/compiler_getelementptr_bitcast.cl create mode 100644 utests/compiler_getelementptr_bitcast.cpp diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 4d6b0c7..4b692e2 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -965,7 +965,11 @@ namespace gbe } ir::Register pointer_reg; - pointer_reg = regTranslator.getScalar(pointer, elemID); + if(isa(pointer)) + pointer_reg = getConstantRegister(dyn_cast(pointer), elemID); + else + pointer_reg = regTranslator.getScalar(pointer, elemID); + ir::Register offset_reg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD); ctx.LOADI(ir::Type::TYPE_S32, offset_reg, ctx.newIntegerImmediate(constantOffset, ir::Type::TYPE_S32)); ir::Register reg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD); diff --git a/kernels/compiler_getelementptr_bitcast.cl b/kernels/compiler_getelementptr_bitcast.cl new file mode 100644 index 0000000..0320abf --- /dev/null +++ b/kernels/compiler_getelementptr_bitcast.cl @@ -0,0 +1,18 @@ +__kernel void compiler_getelementptr_bitcast(global float *src, global float *dst) +{ + int i = get_global_id(0); + + __local float ldata[256]; + ldata[get_local_id(0)] = src[i]; + + //if use get_local_id(0) to index ldata, the issue is not reproduced + //so, just set the work group as 1 in the application + __local uchar * pldata = (__local uchar *)&ldata[0]; + uchar data; + for(int k = 0; k < 3; k++){ + data = *pldata; + pldata++; + } + + dst[i] = data; +} diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index b7d6f71..0488578 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -167,6 +167,7 @@ set (utests_sources compiler_function_argument3.cpp compiler_bool_cross_basic_block.cpp compiler_private_data_overflow.cpp + compiler_getelementptr_bitcast.cpp load_program_from_bin.cpp enqueue_copy_buf.cpp utest_assert.cpp diff --git a/utests/compiler_getelementptr_bitcast.cpp b/utests/compiler_getelementptr_bitcast.cpp new file mode 100644 index 0000000..a57ff36 --- /dev/null +++ b/utests/compiler_getelementptr_bitcast.cpp @@ -0,0 +1,45 @@ +#include "utest_helper.hpp" + +void compiler_getelementptr_bitcast(void) +{ + const size_t n = 16; + float cpu_dst[16], cpu_src[16]; + + // Setup kernel and buffers + OCL_CREATE_KERNEL("compiler_getelementptr_bitcast"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + globals[0] = 16; + + //must be 1 to pass the test, it is required by the special usage in the kernel + locals[0] = 1; + + // Run random tests + for (uint32_t pass = 0; pass < 8; ++pass) { + OCL_MAP_BUFFER(0); + for (int32_t i = 0; i < (int32_t) n; ++i) + cpu_src[i] = ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f; + OCL_UNMAP_BUFFER(0); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Run on CPU + for (int32_t i = 0; i < (int32_t) n; ++i){ + unsigned char* c = (unsigned char*)&cpu_src[i]; + cpu_dst[i] = c[2]; + } + + // Compare + OCL_MAP_BUFFER(1); + for (int32_t i = 0; i < (int32_t) n; ++i){ + //printf("src:%f, gpu_dst: %f, cpu_dst: %f\n", cpu_src[i], ((float *)buf_data[1])[i], cpu_dst[i]); + OCL_ASSERT(((float *)buf_data[1])[i] == cpu_dst[i]); + } + OCL_UNMAP_BUFFER(1); + } +} + +MAKE_UTEST_FROM_FUNCTION(compiler_getelementptr_bitcast); -- 2.7.4