p->pop();
}
+ void GenContext::UnsignedI64ToFloat(GenRegister dst, GenRegister high, GenRegister low, GenRegister tmp) {
+ p->MOV(dst, high);
+ p->MUL(dst, dst, GenRegister::immf(65536.f * 65536.f));
+ tmp.type = GEN_TYPE_F;
+ p->MOV(tmp, low);
+ p->ADD(dst, dst, tmp);
+ }
+
+ void GenContext::emitI64ToFloatInstruction(const SelectionInstruction &insn) {
+ GenRegister src = ra->genReg(insn.src(0));
+ GenRegister dest = ra->genReg(insn.dst(0));
+ GenRegister high = ra->genReg(insn.dst(1));
+ GenRegister low = ra->genReg(insn.dst(2));
+ GenRegister tmp = ra->genReg(insn.dst(3));
+ loadTopHalf(high, src);
+ loadBottomHalf(low, src);
+ if(!src.is_signed_int()) {
+ UnsignedI64ToFloat(dest, high, low, tmp);
+ } else {
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.physicalFlag = 1;
+ p->curr.flag = 1;
+ p->curr.subFlag = 0;
+ p->CMP(GEN_CONDITIONAL_GE, high, GenRegister::immud(0x80000000));
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->NOT(high, high);
+ p->NOT(low, low);
+ p->MOV(tmp, GenRegister::immud(1));
+ addWithCarry(low, low, tmp);
+ p->ADD(high, high, tmp);
+ p->pop();
+ UnsignedI64ToFloat(dest, high, low, tmp);
+ p->push();
+ p->curr.physicalFlag = 1;
+ p->curr.flag = 1;
+ p->curr.subFlag = 0;
+ dest.type = GEN_TYPE_UD;
+ p->OR(dest, dest, GenRegister::immud(0x80000000));
+ p->pop();
+ }
+ }
+
void GenContext::emitI64CompareInstruction(const SelectionInstruction &insn) {
GenRegister src0 = ra->genReg(insn.src(0));
GenRegister src1 = ra->genReg(insn.src(1));
int execWidth = p->curr.execWidth;
GenRegister acc0 = GenRegister::retype(GenRegister::acc(), GEN_TYPE_D);
p->push();
- p->curr.predicate = GEN_PREDICATE_NONE;
p->curr.execWidth = 8;
p->ADDC(dest, src0, src1);
p->MOV(src1, acc0);
if (execWidth == 16) {
+ p->curr.quarterControl = 1;
p->ADDC(GenRegister::suboffset(dest, 8),
GenRegister::suboffset(src0, 8),
GenRegister::suboffset(src1, 8));
void subWithBorrow(GenRegister dest, GenRegister src0, GenRegister src1);
void I32FullMult(GenRegister high, GenRegister low, GenRegister src0, GenRegister src1);
void saveFlag(GenRegister dest, int flag, int subFlag);
+ void UnsignedI64ToFloat(GenRegister dst, GenRegister high, GenRegister low, GenRegister tmp);
/*! Final Gen ISA emission helper functions */
void emitLabelInstruction(const SelectionInstruction &insn);
void emitI64HADDInstruction(const SelectionInstruction &insn);
void emitI64ShiftInstruction(const SelectionInstruction &insn);
void emitI64CompareInstruction(const SelectionInstruction &insn);
+ void emitI64ToFloatInstruction(const SelectionInstruction &insn);
void emitCompareInstruction(const SelectionInstruction &insn);
void emitJumpInstruction(const SelectionInstruction &insn);
void emitIndirectMoveInstruction(const SelectionInstruction &insn);
DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2)
DECL_GEN7_SCHEDULE(I64Shift, 20, 4, 2)
DECL_GEN7_SCHEDULE(I64HADD, 20, 4, 2)
+DECL_GEN7_SCHEDULE(I64ToFloat, 20, 4, 2)
DECL_GEN7_SCHEDULE(Compare, 20, 4, 2)
DECL_GEN7_SCHEDULE(I64Compare, 20, 4, 2)
DECL_GEN7_SCHEDULE(Jump, 14, 1, 1)
#undef ALU2WithTemp
#undef ALU3
#undef I64Shift
+ /*! Convert 64-bit integer to 32-bit float */
+ void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[3]);
/*! (x+y)>>1 without mod. overflow */
void I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]);
/*! Shift a 64-bit integer */
insn->extra.function = conditional;
}
+ void Selection::Opaque::CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[3]) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_CONVI64_TO_F, 4, 1);
+ insn->dst(0) = dst;
+ insn->src(0) = src;
+ for(int i = 0; i < 3; i ++)
+ insn->dst(i + 1) = tmp[i];
+ }
+
void Selection::Opaque::I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]) {
SelectionInstruction *insn = this->appendInsn(SEL_OP_I64HADD, 5, 2);
insn->dst(0) = dst;
sel.MOV(dst, unpacked);
} else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && srcFamily == FAMILY_QWORD) {
sel.CONVI64_TO_I(dst, src);
+ } else if (dstType == ir::TYPE_FLOAT && srcFamily == FAMILY_QWORD) {
+ GenRegister tmp[3];
+ for(int i=0; i<3; i++) {
+ tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
+ tmp[i].type = GEN_TYPE_UD;
+ }
+ sel.CONVI64_TO_F(dst, src, tmp);
} else if (dst.isdf()) {
ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
sel.MOV_DF(dst, src, sel.selReg(r));
DECL_SELECTION_IR(UPSAMPLE_LONG, BinaryInstruction)
DECL_SELECTION_IR(CONVI_TO_I64, UnaryWithTempInstruction)
DECL_SELECTION_IR(CONVI64_TO_I, UnaryInstruction)
+DECL_SELECTION_IR(CONVI64_TO_F, I64ToFloatInstruction)
Type *llvmSrcType = I.getOperand(0)->getType();
const ir::Type dstType = getType(ctx, llvmDstType);
ir::Type srcType;
- if (I.getOpcode() == Instruction::ZExt) {
+ if (I.getOpcode() == Instruction::ZExt || I.getOpcode() == Instruction::UIToFP) {
srcType = getUnsignedType(ctx, llvmSrcType);
} else {
srcType = getType(ctx, llvmSrcType);
dst2[i] = src[i];
dst3[i] = src[i];
}
+
+kernel void compiler_long_convert_to_float(global float *dst, global long *src) {
+ int i = get_global_id(0);
+ dst[i] = src[i];
+}
}
MAKE_UTEST_FROM_FUNCTION(compiler_long_convert_2);
+
+// convert 64-bit integer to 32-bit float
+void compiler_long_convert_to_float(void)
+{
+ const size_t n = 16;
+ int64_t src[n];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL_FROM_FILE("compiler_long_convert", "compiler_long_convert_to_float");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = n;
+ locals[0] = 16;
+
+ // Run random tests
+ for (int32_t i = 0; i < (int32_t) n; ++i) {
+ src[i] = -(int64_t)i;
+ }
+ OCL_MAP_BUFFER(1);
+ memcpy(buf_data[1], src, sizeof(src));
+ OCL_UNMAP_BUFFER(1);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Compare
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ float *dst = ((float *)buf_data[0]);
+ for (int32_t i = 0; i < (int32_t) n; ++i) {
+ //printf("%f\n", dst[i]);
+ OCL_ASSERT(dst[i] == src[i]);
+ }
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_long_convert_to_float);