case SEL_OP_RNDE: p->RNDE(dst, src); break;
case SEL_OP_RNDZ: p->RNDZ(dst, src); break;
case SEL_OP_LOAD_INT64_IMM: p->LOAD_INT64_IMM(dst, src.value.i64); break;
+ case SEL_OP_CONVI64_TO_I:
+ {
+ int execWidth = p->curr.execWidth;
+ GenRegister xsrc = src.bottom_half(), xdst = dst;
+ p->push();
+ p->curr.execWidth = 8;
+ for(int i = 0; i < execWidth/4; i ++) {
+ p->curr.chooseNib(i);
+ p->MOV(xdst, xsrc);
+ xdst = GenRegister::suboffset(xdst, 4);
+ xsrc = GenRegister::suboffset(xsrc, 8);
+ }
+ p->pop();
+ break;
+ }
default: NOT_IMPLEMENTED;
}
}
ALU2(UPSAMPLE_INT)
ALU2(UPSAMPLE_LONG)
ALU1WithTemp(CONVI_TO_I64)
+ ALU1(CONVI64_TO_I)
I64Shift(I64SHL)
I64Shift(I64SHR)
I64Shift(I64ASR)
const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
// We need two instructions to make the conversion
- if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && srcFamily == FAMILY_DWORD) {
+ if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) {
GenRegister unpacked;
if (dstFamily == FAMILY_WORD) {
const uint32_t type = TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
unpacked = GenRegister::unpacked_ub(sel.reg(FAMILY_DWORD));
unpacked = GenRegister::retype(unpacked, type);
}
- sel.MOV(unpacked, src);
+ if(srcFamily == FAMILY_QWORD) {
+ GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD));
+ tmp.type = GEN_TYPE_D;
+ sel.CONVI64_TO_I(tmp, src);
+ sel.MOV(unpacked, tmp);
+ } else
+ sel.MOV(unpacked, src);
sel.MOV(dst, unpacked);
+ } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && srcFamily == FAMILY_QWORD) {
+ sel.CONVI64_TO_I(dst, src);
} else if (dst.isdf()) {
ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
sel.MOV_DF(dst, src, sel.selReg(r));
DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction)
DECL_SELECTION_IR(UPSAMPLE_LONG, BinaryInstruction)
DECL_SELECTION_IR(CONVI_TO_I64, UnaryWithTempInstruction)
+DECL_SELECTION_IR(CONVI64_TO_I, UnaryInstruction)
dst2[i] = src2[i];
dst3[i] = src3[i];
}
+
+kernel void compiler_long_convert_2(global char *dst1, global short *dst2, global int *dst3, global long *src) {
+ int i = get_global_id(0);
+ dst1[i] = src[i];
+ dst2[i] = src[i];
+ dst3[i] = src[i];
+}
#include <iostream>
#include "utest_helper.hpp"
+// convert shorter integer to 64-bit integer
void compiler_long_convert(void)
{
const size_t n = 16;
}
MAKE_UTEST_FROM_FUNCTION(compiler_long_convert);
+
+// convert 64-bit integer to shorter integer
+void compiler_long_convert_2(void)
+{
+ const size_t n = 16;
+ int64_t src[n];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL_FROM_FILE("compiler_long_convert", "compiler_long_convert_2");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(char), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(short), NULL);
+ OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL);
+ OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(int64_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+ OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]);
+ globals[0] = n;
+ locals[0] = 16;
+
+ // Run random tests
+ for (int32_t i = 0; i < (int32_t) n; ++i) {
+ src[i] = -i;
+ }
+ OCL_MAP_BUFFER(3);
+ memcpy(buf_data[3], src, sizeof(src));
+ OCL_UNMAP_BUFFER(3);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Compare
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ OCL_MAP_BUFFER(2);
+ char *dst1 = ((char *)buf_data[0]);
+ short *dst2 = ((short *)buf_data[1]);
+ int *dst3 = ((int *)buf_data[2]);
+ for (int32_t i = 0; i < (int32_t) n; ++i) {
+ //printf("%x %x %x\n", dst1[i], dst2[i], dst3[i]);
+ OCL_ASSERT(dst1[i] == -i);
+ OCL_ASSERT(dst2[i] == -i);
+ OCL_ASSERT(dst3[i] == -i);
+ }
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+ OCL_UNMAP_BUFFER(2);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_long_convert_2);