Make "logical shift right" work
authorHomer Hsing <homer.xing@intel.com>
Fri, 1 Feb 2013 05:48:59 +0000 (13:48 +0800)
committerZhigang Gong <zhigang.gong@linux.intel.com>
Wed, 10 Apr 2013 06:52:32 +0000 (14:52 +0800)
Before we fix the bug, the "logical shift right" is wrong:
  (0xFF000000U >> 24) == 0xFFFFFFFF

After we fix the bug, it is right.
  (0xFF000000U >> 24) == 0xFF

Also add a test case, test by 128 random input.

This patch fixes "component_transfer_linear" filter.

Signed-off-by: Homer Hsing <homer.xing@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
Tested-by: Zhigang Gong <zhigang.gong@linux.intel.com>
backend/src/llvm/llvm_gen_backend.cpp
kernels/compiler_shift_right.cl [new file with mode: 0644]
utests/CMakeLists.txt
utests/compiler_shift_right.cpp [new file with mode: 0644]

index 5c26f75..8e6188f 100644 (file)
@@ -1163,7 +1163,7 @@ namespace gbe
         case Instruction::Or:   ctx.OR(type, dst, src0, src1); break;
         case Instruction::Xor:  ctx.XOR(type, dst, src0, src1); break;
         case Instruction::Shl:  ctx.SHL(type, dst, src0, src1); break;
-        case Instruction::LShr: ctx.SHR(type, dst, src0, src1); break;
+        case Instruction::LShr: ctx.SHR(getUnsignedType(ctx, I.getType()), dst, src0, src1); break;
         case Instruction::AShr: ctx.ASR(type, dst, src0, src1); break;
         default: NOT_SUPPORTED;
       }
diff --git a/kernels/compiler_shift_right.cl b/kernels/compiler_shift_right.cl
new file mode 100644 (file)
index 0000000..c109170
--- /dev/null
@@ -0,0 +1,4 @@
+kernel void compiler_shift_right(global uint *src, global int *dst) {
+    int i = get_global_id(0);
+    dst[i] = src[i] >> 24;
+}
index a599241..c67cf34 100644 (file)
@@ -37,6 +37,7 @@ ADD_LIBRARY(utests SHARED
   compiler_multiple_kernels.cpp
   compiler_saturate.cpp
   compiler_saturate_sub.cpp
+  compiler_shift_right.cpp
   compiler_short_scatter.cpp
   compiler_sub_bytes.cpp
   compiler_sub_shorts.cpp
diff --git a/utests/compiler_shift_right.cpp b/utests/compiler_shift_right.cpp
new file mode 100644 (file)
index 0000000..b94cc46
--- /dev/null
@@ -0,0 +1,45 @@
+#include "utest_helper.hpp"
+
+typedef unsigned int uint;
+
+static void cpu(int global_id, uint *src, int *dst) {
+  dst[global_id] = src[global_id] >> 24;
+}
+
+void compiler_shift_right(void)
+{
+  const size_t n = 16;
+  uint cpu_src[16];
+  int cpu_dst[16];
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_shift_right");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  globals[0] = 16;
+  locals[0] = 16;
+
+  // Run random tests
+  for (uint32_t pass = 0; pass < 8; ++pass) {
+    OCL_MAP_BUFFER(0);
+    for (int32_t i = 0; i < (int32_t) n; ++i)
+      cpu_src[i] = ((uint*)buf_data[0])[i] = 0x80000000 | rand();
+    OCL_UNMAP_BUFFER(0);
+
+    // Run the kernel on GPU
+    OCL_NDRANGE(1);
+
+    // Run on CPU
+    for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst);
+
+    // Compare
+    OCL_MAP_BUFFER(1);
+    for (int32_t i = 0; i < (int32_t) n; ++i)
+      OCL_ASSERT(((int *)buf_data[1])[i] == cpu_dst[i]);
+    OCL_UNMAP_BUFFER(1);
+  }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_shift_right);