GBE: support getelementptr with ConstantExpr operand
authorGuo Yejun <yejun.guo@intel.com>
Wed, 26 Feb 2014 22:54:26 +0000 (06:54 +0800)
committerZhigang Gong <zhigang.gong@intel.com>
Fri, 28 Feb 2014 05:33:28 +0000 (13:33 +0800)
Add support during LLVM IR -> Gen IR period when the
first operand of getelementptr is ConstantExpr.

utest is also added.

Signed-off-by: Guo Yejun <yejun.guo@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
backend/src/llvm/llvm_gen_backend.cpp
kernels/compiler_getelementptr_bitcast.cl [new file with mode: 0644]
utests/CMakeLists.txt
utests/compiler_getelementptr_bitcast.cpp [new file with mode: 0644]

index 4d6b0c7..4b692e2 100644 (file)
@@ -965,7 +965,11 @@ namespace gbe
         }
 
         ir::Register pointer_reg;
-        pointer_reg = regTranslator.getScalar(pointer, elemID);
+        if(isa<ConstantExpr>(pointer))
+          pointer_reg = getConstantRegister(dyn_cast<Constant>(pointer), elemID);
+        else
+          pointer_reg = regTranslator.getScalar(pointer, elemID);
+
         ir::Register offset_reg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
         ctx.LOADI(ir::Type::TYPE_S32, offset_reg, ctx.newIntegerImmediate(constantOffset, ir::Type::TYPE_S32));
         ir::Register reg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
diff --git a/kernels/compiler_getelementptr_bitcast.cl b/kernels/compiler_getelementptr_bitcast.cl
new file mode 100644 (file)
index 0000000..0320abf
--- /dev/null
@@ -0,0 +1,18 @@
+__kernel void compiler_getelementptr_bitcast(global float *src, global float *dst)
+{
+  int i = get_global_id(0);
+
+  __local  float ldata[256];
+  ldata[get_local_id(0)] = src[i];
+
+  //if use get_local_id(0) to index ldata, the issue is not reproduced
+  //so, just set the work group as 1 in the application
+  __local uchar *  pldata = (__local uchar *)&ldata[0];
+  uchar data;
+  for(int k = 0; k < 3; k++){
+    data = *pldata;
+    pldata++;
+  }
+
+  dst[i] = data;
+}
index b7d6f71..0488578 100644 (file)
@@ -167,6 +167,7 @@ set (utests_sources
   compiler_function_argument3.cpp
   compiler_bool_cross_basic_block.cpp
   compiler_private_data_overflow.cpp
+  compiler_getelementptr_bitcast.cpp
   load_program_from_bin.cpp
   enqueue_copy_buf.cpp
   utest_assert.cpp
diff --git a/utests/compiler_getelementptr_bitcast.cpp b/utests/compiler_getelementptr_bitcast.cpp
new file mode 100644 (file)
index 0000000..a57ff36
--- /dev/null
@@ -0,0 +1,45 @@
+#include "utest_helper.hpp"
+
+void compiler_getelementptr_bitcast(void)
+{
+  const size_t n = 16;
+  float cpu_dst[16], cpu_src[16];
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_getelementptr_bitcast");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  globals[0] = 16;
+
+  //must be 1 to pass the test, it is required by the special usage in the kernel
+  locals[0] = 1;
+
+  // Run random tests
+  for (uint32_t pass = 0; pass < 8; ++pass) {
+    OCL_MAP_BUFFER(0);
+    for (int32_t i = 0; i < (int32_t) n; ++i)
+      cpu_src[i] = ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f;
+    OCL_UNMAP_BUFFER(0);
+
+    // Run the kernel on GPU
+    OCL_NDRANGE(1);
+
+    // Run on CPU
+    for (int32_t i = 0; i < (int32_t) n; ++i){
+      unsigned char* c = (unsigned char*)&cpu_src[i];
+      cpu_dst[i] = c[2];
+    }
+
+    // Compare
+    OCL_MAP_BUFFER(1);
+    for (int32_t i = 0; i < (int32_t) n; ++i){
+      //printf("src:%f, gpu_dst: %f, cpu_dst: %f\n", cpu_src[i], ((float *)buf_data[1])[i], cpu_dst[i]);
+      OCL_ASSERT(((float *)buf_data[1])[i] == cpu_dst[i]);
+    }
+    OCL_UNMAP_BUFFER(1);
+  }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_getelementptr_bitcast);