storeTopHalf(dest, c);
}
+ void GenContext::emitI64RHADDInstruction(const SelectionInstruction &insn) {
+ GenRegister dest = ra->genReg(insn.dst(0));
+ GenRegister x = ra->genReg(insn.src(0));
+ GenRegister y = ra->genReg(insn.src(1));
+ GenRegister a = ra->genReg(insn.dst(1));
+ GenRegister b = ra->genReg(insn.dst(2));
+ GenRegister c = ra->genReg(insn.dst(3));
+ GenRegister d = ra->genReg(insn.dst(4));
+ a.type = b.type = c.type = d.type = GEN_TYPE_UD;
+ loadBottomHalf(a, x);
+ loadBottomHalf(b, y);
+ addWithCarry(a, a, b);
+ p->MOV(c, GenRegister::immud(1));
+ addWithCarry(a, a, c);
+ p->ADD(b, b, c);
+ loadTopHalf(c, x);
+ loadTopHalf(d, y);
+ addWithCarry(c, c, b);
+ addWithCarry(c, c, d);
+ p->ADD(b, b, d);
+ p->SHR(a, a, GenRegister::immud(1));
+ p->SHL(d, c, GenRegister::immud(31));
+ p->OR(a, a, d);
+ p->SHR(c, c, GenRegister::immud(1));
+ p->SHL(d, b, GenRegister::immud(31));
+ p->OR(c, c, d);
+ storeBottomHalf(dest, a);
+ storeTopHalf(dest, c);
+ }
+
void GenContext::emitI64ShiftInstruction(const SelectionInstruction &insn) {
GenRegister dest = ra->genReg(insn.dst(0));
GenRegister x = ra->genReg(insn.src(0));
void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
void emitTernaryInstruction(const SelectionInstruction &insn);
void emitI64HADDInstruction(const SelectionInstruction &insn);
+ void emitI64RHADDInstruction(const SelectionInstruction &insn);
void emitI64ShiftInstruction(const SelectionInstruction &insn);
void emitI64CompareInstruction(const SelectionInstruction &insn);
void emitI64ToFloatInstruction(const SelectionInstruction &insn);
DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2)
DECL_GEN7_SCHEDULE(I64Shift, 20, 4, 2)
DECL_GEN7_SCHEDULE(I64HADD, 20, 4, 2)
+DECL_GEN7_SCHEDULE(I64RHADD, 20, 4, 2)
DECL_GEN7_SCHEDULE(I64ToFloat, 20, 4, 2)
DECL_GEN7_SCHEDULE(Compare, 20, 4, 2)
DECL_GEN7_SCHEDULE(I64Compare, 20, 4, 2)
void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[3]);
/*! (x+y)>>1 without mod. overflow */
void I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]);
+ /*! (x+y+1)>>1 without mod. overflow */
+ void I64RHADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]);
/*! Shift a 64-bit integer */
void I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[6]);
/*! Compare 64-bit integer */
insn->dst(i + 1) = tmp[i];
}
+ void Selection::Opaque::I64RHADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64RHADD, 5, 2);
+ insn->dst(0) = dst;
+ insn->src(0) = src0;
+ insn->src(1) = src1;
+ for(int i = 0; i < 4; i ++)
+ insn->dst(i + 1) = tmp[i];
+ }
+
void Selection::Opaque::I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) {
SelectionInstruction *insn = this->appendInsn(opcode, 7, 2);
insn->dst(0) = dst;
sel.I64HADD(dst, src0, src1, tmp);
break;
}
+ case OP_I64RHADD:
+ {
+ GenRegister tmp[4];
+ for(int i=0; i<4; i++)
+ tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
+ sel.I64RHADD(dst, src0, src1, tmp);
+ break;
+ }
case OP_UPSAMPLE_SHORT:
sel.UPSAMPLE_SHORT(dst, src0, src1);
break;
DECL_SELECTION_IR(HADD, BinaryWithTempInstruction)
DECL_SELECTION_IR(RHADD, BinaryWithTempInstruction)
DECL_SELECTION_IR(I64HADD, I64HADDInstruction)
+DECL_SELECTION_IR(I64RHADD, I64RHADDInstruction)
DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction)
DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction)
DECL_SELECTION_IR(UPSAMPLE_LONG, BinaryInstruction)
DECL_EMIT_FUNCTION(HADD)
DECL_EMIT_FUNCTION(RHADD)
DECL_EMIT_FUNCTION(I64HADD)
+ DECL_EMIT_FUNCTION(I64RHADD)
#undef DECL_EMIT_FUNCTION
Instruction RHADD(Type type, Register dst, Register src0, Register src1);
/*! i64hadd.type dst src */
Instruction I64HADD(Type type, Register dst, Register src0, Register src1);
+ /*! i64rhadd.type dst src */
+ Instruction I64RHADD(Type type, Register dst, Register src0, Register src1);
/*! tan.type dst src */
Instruction RCP(Type type, Register dst, Register src);
/*! abs.type dst src */
DECL_INSN(HADD, BinaryInstruction)
DECL_INSN(RHADD, BinaryInstruction)
DECL_INSN(I64HADD, BinaryInstruction)
+DECL_INSN(I64RHADD, BinaryInstruction)
DECL_INSN(UPSAMPLE_SHORT, BinaryInstruction)
DECL_INSN(UPSAMPLE_INT, BinaryInstruction)
DECL_INSN(UPSAMPLE_LONG, BinaryInstruction)
case GEN_OCL_HADD:
case GEN_OCL_RHADD:
case GEN_OCL_I64HADD:
+ case GEN_OCL_I64RHADD:
this->newRegister(&I);
break;
default:
ctx.RHADD(getUnsignedType(ctx, I.getType()), dst, src0, src1);
break;
}
+ case GEN_OCL_I64RHADD:
+ {
+ GBE_ASSERT(AI != AE);
+ const ir::Register src0 = this->getRegister(*(AI++));
+ GBE_ASSERT(AI != AE);
+ const ir::Register src1 = this->getRegister(*(AI++));
+ const ir::Register dst = this->getRegister(&I);
+ ctx.I64RHADD(ir::TYPE_U64, dst, src0, src1);
+ break;
+ }
default: break;
}
}
DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl)
DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_abs)
DECL_LLVM_GEN_FUNCTION(HADD, _Z14__gen_ocl_haddjj)
-DECL_LLVM_GEN_FUNCTION(RHADD, __gen_ocl_rhadd)
+DECL_LLVM_GEN_FUNCTION(RHADD, _Z15__gen_ocl_rhaddjj)
DECL_LLVM_GEN_FUNCTION(I64HADD, _Z14__gen_ocl_haddmm)
+DECL_LLVM_GEN_FUNCTION(I64RHADD, _Z15__gen_ocl_rhaddmm)
DECL_LLVM_GEN_FUNCTION(UPSAMPLE_SHORT, _Z18__gen_ocl_upsampless)
DECL_LLVM_GEN_FUNCTION(UPSAMPLE_INT, _Z18__gen_ocl_upsampleii)
DECL_LLVM_GEN_FUNCTION(UPSAMPLE_LONG, _Z18__gen_ocl_upsamplell)
}
OVERLOADABLE uint __gen_ocl_hadd(uint x, uint y);
-PURE CONST uint __gen_ocl_rhadd(uint x, uint y);
+OVERLOADABLE uint __gen_ocl_rhadd(uint x, uint y);
#define DEC DEF(char); DEF(uchar); DEF(short); DEF(ushort)
#define DEF(type) INLINE_OVERLOADABLE type hadd(type x, type y) { return (x + y) >> 1; }
DEC
__gen_ocl_hadd((uint)x, (uint)y);
}
INLINE_OVERLOADABLE uint hadd(uint x, uint y) { return __gen_ocl_hadd(x, y); }
-INLINE_OVERLOADABLE int rhadd(int x, int y) { return (x < 0 && y > 0) || (x > 0 && y < 0) ? ((x + y + 1) >> 1) : __gen_ocl_rhadd(x, y); }
+INLINE_OVERLOADABLE int rhadd(int x, int y) {
+ return (x < 0 && y > 0) || (x > 0 && y < 0) ?
+ ((x + y + 1) >> 1) :
+ __gen_ocl_rhadd((uint)x, (uint)y);
+ }
INLINE_OVERLOADABLE uint rhadd(uint x, uint y) { return __gen_ocl_rhadd(x, y); }
OVERLOADABLE ulong __gen_ocl_hadd(ulong x, ulong y);
+OVERLOADABLE ulong __gen_ocl_rhadd(ulong x, ulong y);
INLINE_OVERLOADABLE long hadd(long x, long y) {
return (x < 0 && y > 0) || (x > 0 && y < 0) ?
((x + y) >> 1) :
return __gen_ocl_hadd(x, y);
}
INLINE_OVERLOADABLE long rhadd(long x, long y) {
- return 0;
+ return (x < 0 && y > 0) || (x > 0 && y < 0) ?
+ ((x + y + 1) >> 1) :
+ __gen_ocl_rhadd((ulong)x, (ulong)y);
}
INLINE_OVERLOADABLE ulong rhadd(ulong x, ulong y) {
- return 0;
+ return __gen_ocl_rhadd(x, y);
}
int __gen_ocl_abs(int x);