support converting shorter int to 64bit int
authorHomer Hsing <homer.xing@intel.com>
Mon, 12 Aug 2013 02:12:16 +0000 (10:12 +0800)
committerZhigang Gong <zhigang.gong@linux.intel.com>
Mon, 12 Aug 2013 08:36:14 +0000 (16:36 +0800)
converting byte/word/dword to int64
also add test case
v2: define temporary reg as dest reg of instruction

Signed-off-by: Homer Hsing <homer.xing@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
backend/src/backend/gen_context.cpp
backend/src/backend/gen_insn_selection.cpp
backend/src/backend/gen_insn_selection.hxx
backend/src/backend/gen_register.hpp
kernels/compiler_long_convert.cl [new file with mode: 0644]
utests/CMakeLists.txt
utests/compiler_long_convert.cpp [new file with mode: 0644]

index 76e6b68..4d6da8c 100644 (file)
@@ -162,6 +162,31 @@ namespace gbe
       case SEL_OP_MOV_DF:
         p->MOV_DF(dst, src, tmp);
         break;
+      case SEL_OP_CONVI_TO_I64: {
+        GenRegister middle;
+        if (src.type == GEN_TYPE_B || src.type == GEN_TYPE_D) {
+          middle = tmp;
+          middle.type = src.is_signed_int() ? GEN_TYPE_D : GEN_TYPE_UD;
+          p->MOV(middle, src);
+        } else {
+          middle = src;
+        }
+        int execWidth = p->curr.execWidth;
+        p->push();
+        p->curr.execWidth = 8;
+        for (int nib = 0; nib < execWidth / 4; nib ++) {
+          p->curr.chooseNib(nib);
+          p->MOV(dst.bottom_half(), middle);
+          if(middle.is_signed_int())
+            p->ASR(dst.top_half(), middle, GenRegister::immud(31));
+          else
+            p->MOV(dst.top_half(), GenRegister::immd(0));
+          dst = GenRegister::suboffset(dst, 4);
+          middle = GenRegister::suboffset(middle, 4);
+        }
+        p->pop();
+        break;
+      }
       default:
         NOT_IMPLEMENTED;
     }
index 06d1902..46da37f 100644 (file)
@@ -455,6 +455,7 @@ namespace gbe
     ALU2WithTemp(RHADD)
     ALU2(UPSAMPLE_SHORT)
     ALU2(UPSAMPLE_INT)
+    ALU1WithTemp(CONVI_TO_I64)
 #undef ALU1
 #undef ALU1WithTemp
 #undef ALU2
@@ -2257,6 +2258,14 @@ namespace gbe
       } else if (dst.isdf()) {
         ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
         sel.MOV_DF(dst, src, sel.selReg(r));
+      } else if (dst.isint64()) {
+        switch(src.type) {
+          case GEN_TYPE_F:
+          case GEN_TYPE_DF:
+            NOT_IMPLEMENTED;
+          default:
+            sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD)));
+        }
       } else
         sel.MOV(dst, src);
       return true;
index 5660078..9e24dd9 100644 (file)
@@ -58,3 +58,4 @@ DECL_SELECTION_IR(HADD, BinaryWithTempInstruction)
 DECL_SELECTION_IR(RHADD, BinaryWithTempInstruction)
 DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction)
 DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction)
+DECL_SELECTION_IR(CONVI_TO_I64, UnaryWithTempInstruction)
index 2cad4c0..c953319 100644 (file)
@@ -273,6 +273,12 @@ namespace gbe
       return r;
     }
 
+    INLINE bool is_signed_int(void) const {
+      if ((type == GEN_TYPE_B || type == GEN_TYPE_W || type == GEN_TYPE_D || type == GEN_TYPE_L) && file == GEN_GENERAL_REGISTER_FILE)
+        return true;
+      return false;
+    }
+
     INLINE bool isdf(void) const {
       if (type == GEN_TYPE_DF && file == GEN_GENERAL_REGISTER_FILE)
         return true;
diff --git a/kernels/compiler_long_convert.cl b/kernels/compiler_long_convert.cl
new file mode 100644 (file)
index 0000000..f22914f
--- /dev/null
@@ -0,0 +1,7 @@
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+kernel void compiler_long_convert(global char *src1, global short *src2, global int *src3, global long *dst1, global long *dst2, global long *dst3) {
+  int i = get_global_id(0);
+  dst1[i] = src1[i];
+  dst2[i] = src2[i];
+  dst3[i] = src3[i];
+}
index e7d3e72..3922220 100644 (file)
@@ -125,6 +125,7 @@ set (utests_sources
   compiler_double_4.cpp
   compiler_long.cpp
   compiler_long_2.cpp
+  compiler_long_convert.cpp
   utest_assert.cpp
   utest.cpp
   utest_file_map.cpp
diff --git a/utests/compiler_long_convert.cpp b/utests/compiler_long_convert.cpp
new file mode 100644 (file)
index 0000000..18e13ee
--- /dev/null
@@ -0,0 +1,67 @@
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include "utest_helper.hpp"
+
+void compiler_long_convert(void)
+{
+  const size_t n = 16;
+  char src1[n];
+  short src2[n];
+  int src3[n];
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_long_convert");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(char), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(short), NULL);
+  OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL);
+  OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(int64_t), NULL);
+  OCL_CREATE_BUFFER(buf[4], 0, n * sizeof(int64_t), NULL);
+  OCL_CREATE_BUFFER(buf[5], 0, n * sizeof(int64_t), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+  OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]);
+  OCL_SET_ARG(4, sizeof(cl_mem), &buf[4]);
+  OCL_SET_ARG(5, sizeof(cl_mem), &buf[5]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  // Run random tests
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    src1[i] = -i;
+    src2[i] = -i;
+    src3[i] = -i;
+  }
+  OCL_MAP_BUFFER(0);
+  OCL_MAP_BUFFER(1);
+  OCL_MAP_BUFFER(2);
+  memcpy(buf_data[0], src1, sizeof(src1));
+  memcpy(buf_data[1], src2, sizeof(src2));
+  memcpy(buf_data[2], src3, sizeof(src3));
+  OCL_UNMAP_BUFFER(0);
+  OCL_UNMAP_BUFFER(1);
+  OCL_UNMAP_BUFFER(2);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Compare
+  OCL_MAP_BUFFER(3);
+  OCL_MAP_BUFFER(4);
+  OCL_MAP_BUFFER(5);
+  int64_t *dst1 = ((int64_t *)buf_data[3]);
+  int64_t *dst2 = ((int64_t *)buf_data[4]);
+  int64_t *dst3 = ((int64_t *)buf_data[5]);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    //printf("%lx %lx %lx\n", dst1[i], dst2[i], dst3[i]);
+    OCL_ASSERT(dst1[i] == -(int64_t)i);
+    OCL_ASSERT(dst2[i] == -(int64_t)i);
+    OCL_ASSERT(dst3[i] == -(int64_t)i);
+  }
+  OCL_UNMAP_BUFFER(3);
+  OCL_UNMAP_BUFFER(4);
+  OCL_UNMAP_BUFFER(5);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_long_convert);